本文主要是介绍用http协议在rcsb.org自动下载pdb文件,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
请参考rcsb官方提供文档接口
由于python提供接口简单,所以选择了它
程序流程
1.从thefile.txt 文件中读取蛋白序列
2.然后从rcsb 查询,获取查询XML数据
3.解析XML文件
4.下载pdb文件
python代码
可以用python name.py
直接运行.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import urllib
import urllib2
import xml.saxglobal_url = "https://files.rcsb.org/download/"class PdbHandler( xml.sax.ContentHandler ):def __init__(self):self.CurrentData = ""self.BlastOutput_program = ""self.BlastOutput_version = ""self.BlastOutput_db = ""self.BlastOutput_query_def = ""self.BlastOutput_query_len = ""self.blastOutput_iterations = ""self.blastOutput_query_len = 0self.BlastOutput_param = ""self.Parameters_matrix = ""self.Parameters_expect = ""self.Hit_def = "" # 元素开始事件处理def startElement(self, tag, attributes):self.CurrentData = tagif tag == "BlastOutput":print "-------start---BlastOutput--"# 元素结束事件处理def endElement(self, tag):if self.CurrentData == "BlastOutput_program":print "BlastOutput_program:", self.BlastOutput_programelif self.CurrentData == "Hit_def":print "file name :", self.Hit_def[0:4] download_pdb(self.Hit_def[0:4])# 内容事件处理def characters(self, content):if self.CurrentData == "BlastOutput_program":self.BlastOutput_program = contentelif self.CurrentData == "BlastOutput_version":self.BlastOutput_version = contentelif self.CurrentData == "BlastOutput_db":self.BlastOutput_db = contentelif self.CurrentData == "BlastOutput_query-ID":self.BlastOutput_query_ID = contentelif self.CurrentData == "BlastOutput_query-def":self.BlastOutput_query_def = contentelif self.CurrentData == "BlastOutput_query-len":self.BlastOutput_query_len = contentelif self.CurrentData == "Parameters_matrix":self.Parameters_matrix = contentelif self.CurrentData == "Hit_def":self.Hit_def = contentdef download_pdb(file_name):try:f = urllib2.urlopen(global_url + file_name + ".pdb") data = f.read()with open("pdb/" + file_name + ".pdb", "wb") as code: code.write(data)code.close()except Exception as e:print global_url + file_name + ".pdb"def setup_connect():test_data = {'sequence':'TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ', 'eCutOff':'10.0', 'matrix':'BLOSUM62', 'outputFormat':'XML'}#test_data['sequence'] = seqtest_data_urlencode = urllib.urlencode(test_data)requrl = "https://www.rcsb.org/pdb/rest/postBLAST"req = urllib2.Request(url = requrl,data =test_data_urlencode)print reqres_data = urllib2.urlopen(req)print res_datares = res_data.read()return resdef save_to_file(file_name, contents):try:fh = open(file_name, 'w')fh.write(contents)fh.close() except Exception as e:print("save to file error!")def read_file():'''file_object = open('thefile.txt')try:#: for line in file_object:# process linefinally:file_object.close()'''if ( __name__ == "__main__"):print '__main__'# 创建一个 XMLReaderparser = xml.sax.make_parser()# turn off namepsacesparser.setFeature(xml.sax.handler.feature_namespaces, 0)# 重写 ContextHandlerHandler = PdbHandler()parser.setContentHandler( Handler )parser.parse("output1.xml")'''xml_data = setup_connect()save_to_file('output1.xml', xml_data)file_object = open('thefile.txt')try:while 1:line = file_object.readline()if not line:breakline = line.split()print( line )print( line[1] )xml_data = setup_connect(line[1])
# save_to_file('output.txt', data)parser = xml.sax.parseString(xml_data, PdbHandler())#parser.setFeature(xml.sax.handler.feature_namespaces, 0)#Handler = #parser.setContentHandler( Handler )parser.parse()finally:file_object.close()
'''
下面是thefile.txt文件格式,字段之间用空格分隔.
TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ 1
下面是我的运行结果,下载了很多PDB文件.
这篇关于用http协议在rcsb.org自动下载pdb文件的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!