本文整理汇总了Python中Bio.SwissProt.parse方法的典型用法代码示例。如果您正苦于以下问题:Python SwissProt.parse方法的具体用法?Python SwissProt.parse怎么用?Python SwissProt.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SwissProt
的用法示例。
在下文中一共展示了SwissProt.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: features
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def features(files):
ft=['ZN_FING', 'REGION','METAL','SITE','SIGNAL','REPEAT', 'NP_REGION', 'BINDING','MOTIF','MOD_RES', 'LIPID','DOMAIN','DNA_BIND','DISULFID','CROSSLNK', 'CARBOHYD','CA_BIND', 'ACT_SITE']
for record in SwissProt.parse(open(files)):
for l in record.features:
if l[0] in ft:
print l[0]+','+str(l[1])+'-'+str(l[2])+','+l[3]
示例2: go_in_papers
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def go_in_papers(sp_path):
# Returns: papers: key: pubmed_id; value: list of go_rec records
# go_rec record is a dictionary. Keys (values): 'sp_id' (swissprot id);
# 'go_id': (GO ID); 'go_ec': (GO Evidence Code).
# To be used with SP data, not GOA
papers = {}
go_ids = {}
sp_recs = {}
papers_prots = {}
sph = open(sp_path)
for sp_rec in SP.parse(sph):
cur_go_recs = get_go_evidence_codes(sp_rec)
# print cur_go_recs
if not cur_go_recs:
continue
cur_papers = get_papers(sp_rec)
for paper in cur_papers:
if paper not in papers_prots:
papers_prots[paper] = {sp_rec.entry_name: 1}
else:
papers_prots[paper][sp_rec.entry_name] = \
papers_prots[paper].get(sp_rec.entry_name,0)+1
for cur_go_rec in cur_go_recs:
d1 = dict(sp_id=sp_rec.entry_name,
go_id=cur_go_rec[0],
go_ec=cur_go_rec[1])
papers.setdefault(paper,[]).append(d1)
return papers, papers_prots
示例3: _parse_features
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def _parse_features( self ):
print( 'uniprot flat files, to get features...' )
with open( path + files[16], 'wt' ) as outf:
for j in [11,12,13,14]:
print( files[j] + '...' )
with open(path + files[j], 'rt') as handle:
for record in SwissProt.parse(handle):
if record.taxonomy_id[0] in ['9606', '10090', '10116']:
accs = record.accessions
acc = accs.pop(0)
feats = record.features
for f in feats:
f = list(f)
f.insert(3, '')
if re.search(r'^[^\.]+\.\s*$', f[4]):
m = re.match(r'^(.+)\.\s*$', f[4])
if m:
f[3] = m.group(1)
f[4] = ''
elif re.search(r'.+\.\s+\{', f[4]):
m = re.match(r'^(.+)\.\s*\{(.+)\}\.$', f[4])
if m:
f[3] = m.group(1)
f[4] = m.group(2)
elif re.search(r'.+\.\s+\/', f[4]):
m = re.match(r'^(.+)\.\s*\/(.+)\.$', f[4])
if m:
f[3] = m.group(1)
f[4] = m.group(2)
else :
f[4] = re.sub(r'[\{\}\.\/]', '', f[4])
#print(f)
outf.write( acc + "\t" + '\t'.join(map(str, f)) + '\n')
示例4: get_genes
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def get_genes (self,gene_name=""):
if gene_name != "":
print "Finding \"{}\" gene in Uniprot database...".format(gene_name)
upper_name = gene_name.upper() # Rho --> RHO
output_handle = open(self.fasta_file, "w")
for record in SwissProt.parse (self.fd):
match = record.gene_name[5:5+len(upper_name)+1].upper()
# Name=Rhodop; --> RHOD (Length of the queried name (rho)+1)
# For matching the two possibilities
# 1) Name=Rho;
# 2) Name=rho {ECO.....}
# So, it fill compare the queried gene name and match one e.g.
# in 1st case "RHO " == "RHO;" or "RHO;" == "RHO;"
# in 2nd case "RHO " == "RHO " or "RHO;" == "RHO "
# We do not consider gene names differ to "Name=...;" in swisprot file
if (upper_name+" ") == match or (upper_name+";") == match:
print "Add protein to fasta file: " + record.entry_name + ", ...." + record.gene_name
output = ">"+record.entry_name+"\n"+record.sequence.format("fasta")+"\n"
#print output
output_handle.write(output)
output_handle.write("")
output_handle.close()
示例5: load_uniprot
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def load_uniprot(self):
self.uniprot = None
if not self.exists('uniprot.txt'):
return
with self.open('uniprot.txt') as fp:
self.uniprot = []
for record in SwissProt.parse(fp):
self.uniprot.append(record)
示例6: _parse_flat_files
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def _parse_flat_files( self ):
print( 'uniprot flat files...' )
with open( path + files[15], 'wt' ) as outf:
for j in [11,12,13,14]:
print( files[j] + '...' )
with open(path + files[j], 'rt') as handle:
for record in SwissProt.parse(handle):
if record.taxonomy_id[0] in ['9606', '10090', '10116']:
accs = record.accessions
acc = accs.pop(0)
rev = record.data_class
gname = re.sub(r'.*Name=([^;{]+)[{;].*', r'\1', record.gene_name).strip()
uid = record.entry_name
taxid = record.taxonomy_id[0]
seq = record.sequence
sinfo = str(record.seqinfo[0])
srcdb = 'sp'
if re.search(r'trembl', files[j]):
srcdb = 'tr'
rname = ''
fname = ''
sname = ''
flags = ''
if 'RecName' in record.description:
rname = re.sub(r'.*RecName: *Full=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
elif 'SubName' in record.description:
rname = re.sub(r'.*SubName: *Full=([^;{]+) *[;{].*', r'\1', record.description, re.IGNORECASE).strip()
if 'AltName' in record.description:
if re.search(r'AltName:[^:]*Full=', record.description, re.IGNORECASE):
fname = re.sub(r'.*AltName:[^:]*Full=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
if re.search(r'AltName:[^:]*Short=', record.description, re.IGNORECASE):
sname = re.sub(r'.*AltName:[^:]*Short=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
if 'Flags:' in record.description:
flags = re.sub(r'.*Flags: *([^;]+);.*', r'\1', record.description, re.IGNORECASE).strip()
refs = list()
eids = list()
mgis = list()
hgnc = list()
dids = list()
dnms = list()
ddbs = list()
for i in range(0, len(record.cross_references)):
if record.cross_references[i][0] == 'GeneID':
eids.append(record.cross_references[i][1])
if record.cross_references[i][0] == 'RefSeq':
refs.append(re.sub(r'\.\d+$', r'', record.cross_references[i][1]))
if record.cross_references[i][0] == 'MGI':
mgis.append(record.cross_references[i][1])
if record.cross_references[i][0] == 'HGNC':
hgnc.append(record.cross_references[i][1])
if record.cross_references[i][0] in xdoms:
dids.append(record.cross_references[i][1])
ddbs.append(record.cross_references[i][0])
dnms.append(record.cross_references[i][2])
outf.write( '\t'.join([ acc, uid, srcdb, taxid, rev, gname, rname, fname, sname, flags, '|'.join(accs), '|'.join(eids), '|'.join(refs), '|'.join(hgnc), '|'.join(mgis), '|'.join(ddbs), '|'.join(dids), '|'.join(dnms), sinfo, seq ]) + '\n' )
示例7: obtain_taxons
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def obtain_taxons(self, protein_dict, fh_sprot):
found = False
for rec in sp.parse(fh_sprot):
for ac in range(len(rec.accessions)):
if rec.accessions[ac] in protein_dict.keys():
# assign rec.taxonomy_id list to the protein
protein_dict[rec.accessions[ac]] = rec.taxonomy_id
found = True
break
#if found:
# break
return protein_dict
示例8: __init__
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def __init__(self, sprot_cache='', trembl_cache='', organism='homo sapien'):
self.records = {}
self.organism = organism.strip().lower()
if sprot_cache:
# Load the swissprot records if file can be found
try:
with open(sprot_cache) as fp:
for record in SwissProt.parse(fp):
for accession in record.accessions:
self.records[accession] = record
except IOError, e:
print(e); print("SwissProt cache not loaded")
示例9: __build_NEXP_accession_singleSpecies
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def __build_NEXP_accession_singleSpecies(fh_sprot, taxon_id, ontType, EXP_default=set([])):
'''
This method builds a list of accessions of the proteins whose annotations
have non-EXP evidence but no EXP evidence codes in a specific
UniProtKB/SwissProt file (file pointer fh_sprot) for some ontology
type (ontType). The method returns the list.
'''
# nexp_accessions: Initialize a list to store the accessions of the
# proteins that meet the criteria: (1) the protein whose annotation
# is supported some Non-EXP evidence code in the specific ontology
# ontType, but (2) the annotation is NOT supported by any EXP
# evidence code.
nexp_accessions = []
print(' Building the accession list with the proteins ' + \
'that have only non-EXP evidence codes at time t1 ...')
for rec in sp.parse(fh_sprot):
# Selects records that are related to a specific
# taxonomy id taxon_id:
if taxon_id in rec.taxonomy_id:
# ont_specific_code_exist: this varilable is initialized to False
# at the beginning of each iteration. If an evidence code (either
# EXP or Non-EXP) for the current record is found, this varilable
# will be set to True
ont_specific_code_exist = False
# exp_code: this variable is initialized to False at the beginning
# of each iteration. If an EXP evidence for the current record is
# found, this variable will be set to True.
exp_code = False
# Going over the list of DB reference entries:
for crossRef in rec.cross_references:
# Consider the cross_reference entries
# that relate to GO DB:
if crossRef[0] == 'GO':
goList = [crossRef[1],
(crossRef[3].split(':'))[0],
crossRef[2][0]]
if not ont_specific_code_exist and goList[2] == ontType:
ont_specific_code_exist = True
if goList[2] == ontType and \
(crossRef[3].split(':'))[0] in EXP_default:
exp_code = True
break
# If the protein's annotation is supported by some Non-EXP evidence
# code but is not supported by any EXP evidence code, append the
# protein's accessions list to the nexp_accessions list:
if ont_specific_code_exist and not exp_code:
nexp_accessions.append(rec.accessions)
return nexp_accessions
示例10: obtain_goterms
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def obtain_goterms(self, goterm_dict, fh_sprot):
found = False
for rec in sp.parse(fh_sprot):
for ac in range(len(rec.accessions)):
goList = []
if rec.accessions[ac] in goterm_dict.keys():
for crossRef in rec.cross_references:
if crossRef[0] == 'GO':
goDef = (crossRef[1], (crossRef[3].split(':'))[0], \
crossRef[2][0])
goterm_dict[rec.accessions[ac]].add(goDef)
found = True
break
#if found:
#break
return goterm_dict
示例11: UNIPROT_GENE_PLUS
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def UNIPROT_GENE_PLUS(UNIPROT): #LIST-The difference between this and UNIPROT_GENE is that UNIPROT_GENE_PLUS returns synonim genes as well if
#any and the gene name in the first entry
import urllib, urllib2
from Bio import SwissProt
url=urllib2.urlopen("http://www.uniprot.org/uniprot/%s.txt"%UNIPROT)
GENES=[]
for record in SwissProt.parse(url):
if len(record.gene_name.split(";"))>2:
GENES.append(record.gene_name.split(";")[0].split("=")[1])
SYN=record.gene_name.split(";")[1].split("=")[1].split(",")
for syno in SYN:
GENES.append("".join(syno.split()))
else:
GENES.append(record.gene_name.split(";")[0].split("=")[1])
return GENES
示例12: count_genes_with_EXP_old
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_genes_with_EXP_old(fh_sprot, taxon_id, EXP_default=set([])):
# The exp_bpo_ct variable counts total number of genes in
# the sprot file related to the taxonomy id taxon_id whose
# annotations have EXP evidence and in BPO ontological category:
exp_bpo_ct = 0
# The exp_cco_ct variable counts total number of genes in
# the sprot file related to the taxonomy id taxon_id whose
# annotations have EXP evidence and in CCO ontological category:
exp_cco_ct = 0
# The exp_mfo_ct variable counts total number of genes in
# the sprot file related to the taxonomy id taxon_id whose
# annotations have EXP evidence and in MFO ontological category:
exp_mfo_ct = 0
for rec in sp.parse(fh_sprot):
# SELECT records that are related to a specific
# taxon_id such as 559292 for yeast:
if taxon_id in rec.taxonomy_id:
bpo_exp_flag = cco_exp_flag = mfo_exp_flag = False
# Go over the list of GO information:
for crossRef in rec.cross_references:
# Consider the cross_reference entries that
# relate to GO DB:
if crossRef[0] == 'GO':
goList = [crossRef[1],
(crossRef[3].split(':'))[0],
crossRef[2][0]]
if (crossRef[3].split(':'))[0] in EXP_default:
if goList[-1].upper() == 'P':
bpo_exp_flag = True
elif goList[-1].upper() == 'C':
cco_exp_flag = True
elif goList[-1].upper() == 'F':
mfo_exp_flag = True
if (bpo_exp_flag and cco_exp_flag and mfo_exp_flag):
break
# Increase gene counts in BPO, CCO, and MFO categories
# depending on the corresponding flag values:
if bpo_exp_flag:
exp_bpo_ct += 1
if cco_exp_flag:
exp_cco_ct += 1
if mfo_exp_flag:
exp_mfo_ct += 1
return (exp_bpo_ct, exp_cco_ct, exp_mfo_ct)
示例13: count_GOterms_with_EXP
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_GOterms_with_EXP(fh_sprot, taxon_id, EXP_default=set([])):
'''
This method extract the distinct GO terms for each gene that
have validation with any of the experimental evidence codes.
A set is created for these GO terms for each gene and then
are placed in a dictionary of each ontological categories.
At the end, these THREE dictionaries are returned.
'''
mfo_terms = OrderedDict()
bpo_terms = OrderedDict()
cco_terms = OrderedDict()
count = 0
for rec in sp.parse(fh_sprot):
# SELECT records that are related to a specific
# taxon_id such as 559292 for yeast:
if taxon_id in rec.taxonomy_id:
protName = rec.accessions[0]
# Initialize lists for adding GO terms:
terms_mfo = set()
terms_bpo = set()
terms_cco = set()
# Go over the list of DB cross references:
for crossRef in rec.cross_references:
# Consider the cross_reference entries that
# relate to GO DB:
if crossRef[0] == 'GO':
goList = [crossRef[1],
(crossRef[3].split(':'))[0],
crossRef[2][0]]
if (crossRef[3].split(':'))[0] in EXP_default:
# print goList
if goList[-1].upper() == 'F':
terms_mfo.add(goList[0])
elif goList[-1].upper() == 'P':
terms_bpo.add(goList[0])
elif goList[-1].upper() == 'C':
terms_cco.add(goList[0])
# Increase gene counts in BPO, CCO, and MFO categories
# depending on the corresponding flag values:
mfo_terms[protName] = terms_mfo
bpo_terms[protName] = terms_bpo
cco_terms[protName] = terms_cco
count += 1
if count > 20:
break
#break
return (mfo_terms, bpo_terms, cco_terms)
示例14: check_sprot_format
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def check_sprot_format(fh_sprot):
"""
This method checks whether the format of the file
(with file handle fh_sprot) is in UniProtKB/Swissprot format.
If the file is in UniProtKB/Swissprot format format,
it returns True
Otherwise,
it returns False.
"""
iter_handle = sp.parse(fh_sprot) # sp.parse method returns a generator
try:
for rec in iter_handle:
break
except:
return False
else:
return True
示例15: count_genes_with_EXP
# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_genes_with_EXP(fh_sprot, taxon_id, EXP_default=set([])):
gene_count = {}
gene_count['MFO'] = 0
gene_count['BPO'] = 0
gene_count['CCO'] = 0
for rec in sp.parse(fh_sprot):
# SELECT records that are related to a specific
# taxon_id such as 559292 for yeast:
if taxon_id in rec.taxonomy_id:
# Three flags to check whether an Exp evidence is found
# in any of BPO, CCO, and MFO ontological categories:
exp_flag = {}
exp_flag['MFO'] = False
exp_flag['BPO'] = False
exp_flag['CCO'] = False
# Go over the list of DB cross references:
for crossRef in rec.cross_references:
# Consider the cross_reference entries that
# relate to GO DB:
if crossRef[0] == 'GO':
goList = [crossRef[1],
(crossRef[3].split(':'))[0],
crossRef[2][0]]
if (crossRef[3].split(':'))[0] in EXP_default:
if goList[-1].upper() == 'F':
exp_flag['MFO'] = True
elif goList[-1].upper() == 'P':
exp_flag['BPO'] = True
elif goList[-1].upper() == 'C':
exp_flag['CCO'] = True
# Whenever an exp evidence for all three ontological
# categories are found, break out the loop:
if (exp_flag['MFO'] and exp_flag['BPO'] and exp_flag['CCO']):
break
# Increase gene counts in BPO, CCO, and MFO categories
# depending on the corresponding flag values:
if exp_flag['MFO']:
gene_count['MFO'] += 1
if exp_flag['BPO']:
gene_count['BPO'] += 1
if exp_flag['CCO']:
gene_count['CCO'] += 1
return gene_count