当前位置: 首页>>代码示例>>Python>>正文


Python SwissProt.parse方法代码示例

本文整理汇总了Python中Bio.SwissProt.parse方法的典型用法代码示例。如果您正苦于以下问题:Python SwissProt.parse方法的具体用法?Python SwissProt.parse怎么用?Python SwissProt.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SwissProt的用法示例。


在下文中一共展示了SwissProt.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: features

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def features(files):
	ft=['ZN_FING', 'REGION','METAL','SITE','SIGNAL','REPEAT', 'NP_REGION', 'BINDING','MOTIF','MOD_RES', 'LIPID','DOMAIN','DNA_BIND','DISULFID','CROSSLNK', 'CARBOHYD','CA_BIND', 'ACT_SITE']
	for record in SwissProt.parse(open(files)):
		for l in record.features:
			
			if l[0] in ft:
				print l[0]+','+str(l[1])+'-'+str(l[2])+','+l[3]
开发者ID:ElofssonLab,项目名称:enrichetta-thesis,代码行数:9,代码来源:features.py

示例2: go_in_papers

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def go_in_papers(sp_path):
    # Returns: papers: key: pubmed_id; value: list of go_rec records
    # go_rec record is a dictionary. Keys (values): 'sp_id' (swissprot id); 
    # 'go_id': (GO ID); 'go_ec': (GO Evidence Code).
    
    # To be used with SP data, not GOA
    
    papers = {}
    go_ids = {}
    sp_recs = {}
    papers_prots = {}
    sph = open(sp_path)
    for sp_rec in SP.parse(sph):
        cur_go_recs = get_go_evidence_codes(sp_rec)
#        print cur_go_recs
        if not cur_go_recs: 
            continue
        cur_papers = get_papers(sp_rec)
        for paper in cur_papers:
            if paper not in papers_prots:
                papers_prots[paper] = {sp_rec.entry_name: 1}
            else:
                papers_prots[paper][sp_rec.entry_name] = \
                    papers_prots[paper].get(sp_rec.entry_name,0)+1
            for cur_go_rec in cur_go_recs:
                d1 = dict(sp_id=sp_rec.entry_name,
                          go_id=cur_go_rec[0],
                          go_ec=cur_go_rec[1])
                papers.setdefault(paper,[]).append(d1)
    return papers, papers_prots        
开发者ID:FriedbergLab,项目名称:Uniprot-Bias,代码行数:32,代码来源:sp_tools.py

示例3: _parse_features

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
    def _parse_features( self ):
    
        print( 'uniprot flat files, to get features...' )
        with open( path + files[16], 'wt' ) as outf:

            for j in [11,12,13,14]:
                print( files[j] + '...' )
                with open(path + files[j], 'rt') as handle:
                    for record in SwissProt.parse(handle):
                        if record.taxonomy_id[0] in ['9606', '10090', '10116']:
                            accs  = record.accessions
                            acc   = accs.pop(0)
                            feats = record.features
                            for f in feats:
                                f = list(f)
                                f.insert(3, '')
                                if re.search(r'^[^\.]+\.\s*$', f[4]):
                                    m = re.match(r'^(.+)\.\s*$', f[4])
                                    if m:
                                        f[3] = m.group(1)
                                        f[4] = ''
                                elif re.search(r'.+\.\s+\{', f[4]):
                                    m = re.match(r'^(.+)\.\s*\{(.+)\}\.$', f[4])
                                    if m:
                                        f[3] = m.group(1)
                                        f[4] = m.group(2)
                                elif re.search(r'.+\.\s+\/', f[4]):
                                    m = re.match(r'^(.+)\.\s*\/(.+)\.$', f[4])
                                    if m:
                                        f[3] = m.group(1)
                                        f[4] = m.group(2)                                
                                else :
                                    f[4] = re.sub(r'[\{\}\.\/]', '', f[4]) 
                                #print(f)
                                outf.write( acc + "\t" + '\t'.join(map(str, f)) + '\n')
开发者ID:jd690764,项目名称:pkjnetwork,代码行数:37,代码来源:load_uniprot.py

示例4: get_genes

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
    def get_genes (self,gene_name=""):
        if gene_name != "":
            print "Finding \"{}\" gene in Uniprot database...".format(gene_name)
            upper_name = gene_name.upper() # Rho --> RHO

            output_handle = open(self.fasta_file, "w")

            for record in SwissProt.parse (self.fd):

                match = record.gene_name[5:5+len(upper_name)+1].upper()
                # Name=Rhodop; --> RHOD (Length of the queried name (rho)+1)
                # For matching the two possibilities
                # 1) Name=Rho;
                # 2) Name=rho {ECO.....}
                # So, it fill compare the queried gene name and match one e.g.
                # in 1st case "RHO " == "RHO;" or "RHO;" == "RHO;"
                # in 2nd case "RHO " == "RHO " or "RHO;" == "RHO "
                # We do not consider gene names differ to "Name=...;" in swisprot file



                if (upper_name+" ") == match or (upper_name+";") == match:
                    print "Add protein to fasta file: " + record.entry_name + ", ...." + record.gene_name
                    output = ">"+record.entry_name+"\n"+record.sequence.format("fasta")+"\n"
                    #print output
                    output_handle.write(output)
            output_handle.write("")
            output_handle.close()
开发者ID:ilap,项目名称:MockAssignment,代码行数:30,代码来源:Main.py

示例5: load_uniprot

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
 def load_uniprot(self):
     self.uniprot = None
     if not self.exists('uniprot.txt'):
         return
     with self.open('uniprot.txt') as fp:
         self.uniprot = []
         for record in SwissProt.parse(fp):
             self.uniprot.append(record)
开发者ID:admp,项目名称:protein-search,代码行数:10,代码来源:session.py

示例6: _parse_flat_files

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
    def _parse_flat_files( self ):
    
        print( 'uniprot flat files...' )
        with open( path + files[15], 'wt' ) as outf:

            for j in [11,12,13,14]:
                print( files[j] + '...' )
                with open(path + files[j], 'rt') as handle:
                    for record in SwissProt.parse(handle):
                        if record.taxonomy_id[0] in ['9606', '10090', '10116']:
                            accs  = record.accessions
                            acc   = accs.pop(0)
                            rev   = record.data_class
                            gname = re.sub(r'.*Name=([^;{]+)[{;].*', r'\1', record.gene_name).strip()
                            uid   = record.entry_name
                            taxid = record.taxonomy_id[0]
                            seq   = record.sequence
                            sinfo = str(record.seqinfo[0])
                            srcdb = 'sp'
                            if re.search(r'trembl', files[j]):
                                srcdb = 'tr'
                            rname = ''
                            fname = ''
                            sname = ''
                            flags = ''
                            if 'RecName' in record.description:
                                rname = re.sub(r'.*RecName: *Full=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
                            elif 'SubName' in record.description:
                                rname = re.sub(r'.*SubName: *Full=([^;{]+) *[;{].*', r'\1', record.description, re.IGNORECASE).strip()
                            if 'AltName' in record.description:
                                if re.search(r'AltName:[^:]*Full=', record.description, re.IGNORECASE): 
                                    fname = re.sub(r'.*AltName:[^:]*Full=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
                                if re.search(r'AltName:[^:]*Short=', record.description, re.IGNORECASE): 
                                    sname = re.sub(r'.*AltName:[^:]*Short=([^;{]+)[{;].*', r'\1', record.description, re.IGNORECASE).strip()
                            if 'Flags:' in record.description:
                                flags = re.sub(r'.*Flags: *([^;]+);.*', r'\1', record.description, re.IGNORECASE).strip()
                            refs  = list()
                            eids  = list()
                            mgis  = list()
                            hgnc  = list()
                            dids  = list()
                            dnms  = list()
                            ddbs  = list()
                            for i in range(0, len(record.cross_references)):
                                if record.cross_references[i][0] == 'GeneID':
                                    eids.append(record.cross_references[i][1])
                                if record.cross_references[i][0] == 'RefSeq':
                                    refs.append(re.sub(r'\.\d+$', r'', record.cross_references[i][1]))
                                if record.cross_references[i][0] == 'MGI':
                                    mgis.append(record.cross_references[i][1])
                                if record.cross_references[i][0] == 'HGNC':
                                    hgnc.append(record.cross_references[i][1])
                                if record.cross_references[i][0] in xdoms:
                                    dids.append(record.cross_references[i][1])
                                    ddbs.append(record.cross_references[i][0])
                                    dnms.append(record.cross_references[i][2])
                            outf.write( '\t'.join([ acc, uid, srcdb, taxid, rev, gname, rname, fname, sname, flags, '|'.join(accs), '|'.join(eids), '|'.join(refs), '|'.join(hgnc), '|'.join(mgis), '|'.join(ddbs), '|'.join(dids), '|'.join(dnms), sinfo, seq ]) + '\n' )          
开发者ID:jd690764,项目名称:pkjnetwork,代码行数:59,代码来源:load_uniprot.py

示例7: obtain_taxons

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
 def obtain_taxons(self, protein_dict, fh_sprot): 
     found = False
     for rec in sp.parse(fh_sprot):
         for ac in range(len(rec.accessions)): 
             if rec.accessions[ac] in protein_dict.keys(): 
                 # assign rec.taxonomy_id list to the protein 
                 protein_dict[rec.accessions[ac]] = rec.taxonomy_id 
                 found = True
                 break
         #if found: 
         #    break 
     return protein_dict
开发者ID:arkatebi,项目名称:CAFA-Toolset,代码行数:14,代码来源:Get_taxons.py

示例8: __init__

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
 def __init__(self, sprot_cache='', trembl_cache='', organism='homo sapien'):
     self.records = {}
     self.organism = organism.strip().lower()
     if sprot_cache:
         # Load the swissprot records if file can be found
         try:
             with open(sprot_cache) as fp:
                 for record in SwissProt.parse(fp):
                     for accession in record.accessions:
                         self.records[accession] = record
         except IOError, e:
             print(e); print("SwissProt cache not loaded")
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:14,代码来源:uniprot.py

示例9: __build_NEXP_accession_singleSpecies

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def __build_NEXP_accession_singleSpecies(fh_sprot, taxon_id, ontType, EXP_default=set([])):
    '''
    This method builds a list of accessions of the proteins whose annotations 
    have non-EXP evidence but no EXP evidence codes in a specific 
    UniProtKB/SwissProt file (file pointer fh_sprot) for some ontology 
    type (ontType). The method returns the list. 
    '''
    # nexp_accessions: Initialize a list to store the accessions of the 
    # proteins that meet the criteria: (1) the protein whose annotation 
    # is supported some Non-EXP evidence code in the specific ontology 
    # ontType, but (2) the annotation is NOT supported by any EXP 
    # evidence code.
    nexp_accessions = []
    print('      Building the accession list with the proteins ' + \
          'that have only non-EXP evidence codes at time t1 ...')
    for rec in sp.parse(fh_sprot):
        # Selects records that are related to a specific
        # taxonomy id taxon_id:
        if taxon_id in rec.taxonomy_id:
            # ont_specific_code_exist: this varilable is initialized to False
            # at the beginning of each iteration. If an evidence code (either 
            # EXP or Non-EXP) for the current record is found, this varilable 
            # will be set to True
            ont_specific_code_exist = False
            # exp_code: this variable is initialized to False at the beginning 
            # of each iteration. If an EXP evidence for the current record is 
            # found, this variable will be set to True.
            exp_code = False
            # Going over the list of DB reference entries:
            for crossRef in rec.cross_references:
            # Consider the cross_reference entries
            # that relate to GO DB:
                if crossRef[0] == 'GO':
                    goList = [crossRef[1],
                              (crossRef[3].split(':'))[0],
                              crossRef[2][0]]
                    if not ont_specific_code_exist and goList[2] == ontType:
                        ont_specific_code_exist = True
                    if goList[2] == ontType and \
                        (crossRef[3].split(':'))[0] in EXP_default:
                        exp_code = True
                        break
            # If the protein's annotation is supported by some Non-EXP evidence
            # code but is not supported by any EXP evidence code, append the 
            # protein's accessions list to the nexp_accessions list:
            if ont_specific_code_exist and not exp_code:
                nexp_accessions.append(rec.accessions)
    return nexp_accessions
开发者ID:arkatebi,项目名称:SwissProt-stats,代码行数:50,代码来源:xTract_sp_evalSet.py

示例10: obtain_goterms

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
 def obtain_goterms(self, goterm_dict, fh_sprot):
     found = False
     for rec in sp.parse(fh_sprot):
         for ac in range(len(rec.accessions)):
             goList = []
             if rec.accessions[ac] in goterm_dict.keys():
                 for crossRef in rec.cross_references:
                     if crossRef[0] == 'GO':
                        goDef = (crossRef[1], (crossRef[3].split(':'))[0], \
                                  crossRef[2][0])
                        goterm_dict[rec.accessions[ac]].add(goDef)
                 found = True
                 break
         #if found: 
             #break 
     return goterm_dict
开发者ID:arkatebi,项目名称:CAFA-Toolset,代码行数:18,代码来源:Get_goterms.py

示例11: UNIPROT_GENE_PLUS

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def UNIPROT_GENE_PLUS(UNIPROT): #LIST-The difference between this and UNIPROT_GENE is that UNIPROT_GENE_PLUS returns synonim genes as well if    
                                #any and the gene name in the first entry
    import urllib, urllib2
    from Bio import SwissProt
    
    url=urllib2.urlopen("http://www.uniprot.org/uniprot/%s.txt"%UNIPROT)
    GENES=[]
    for record in SwissProt.parse(url):
        if len(record.gene_name.split(";"))>2:
            GENES.append(record.gene_name.split(";")[0].split("=")[1])
            SYN=record.gene_name.split(";")[1].split("=")[1].split(",")
            for syno in SYN:
                GENES.append("".join(syno.split()))
        else:
            GENES.append(record.gene_name.split(";")[0].split("=")[1])
    return GENES
开发者ID:LOBUTO,项目名称:CANCER.GENOMICS,代码行数:18,代码来源:66_HUBS_90_ANALYSIS.py

示例12: count_genes_with_EXP_old

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_genes_with_EXP_old(fh_sprot, taxon_id, EXP_default=set([])):
    # The exp_bpo_ct variable counts total number of genes in
    # the sprot file related to the taxonomy id taxon_id whose
    # annotations have EXP evidence and in BPO ontological category:
    exp_bpo_ct = 0

    # The exp_cco_ct variable counts total number of genes in
    # the sprot file related to the taxonomy id taxon_id whose
    # annotations have EXP evidence and in CCO ontological category:
    exp_cco_ct = 0

    # The exp_mfo_ct variable counts total number of genes in
    # the sprot file related to the taxonomy id taxon_id whose
    # annotations have EXP evidence and in MFO ontological category:
    exp_mfo_ct = 0

    for rec in sp.parse(fh_sprot):
        # SELECT records that are related to a specific
        # taxon_id such as 559292 for yeast:
        if taxon_id in rec.taxonomy_id:
            bpo_exp_flag = cco_exp_flag = mfo_exp_flag = False
            # Go over the list of GO information:
            for crossRef in rec.cross_references:
                # Consider the cross_reference entries that
                # relate to GO DB:
                if crossRef[0] == 'GO':
                    goList = [crossRef[1],
                              (crossRef[3].split(':'))[0],
                              crossRef[2][0]]
                    if (crossRef[3].split(':'))[0] in EXP_default:
                        if goList[-1].upper() == 'P':
                            bpo_exp_flag = True
                        elif goList[-1].upper() == 'C':
                            cco_exp_flag = True
                        elif goList[-1].upper() == 'F':
                            mfo_exp_flag = True
                if (bpo_exp_flag and cco_exp_flag and mfo_exp_flag):
                    break
            # Increase gene counts in BPO, CCO, and MFO categories
            # depending on the corresponding flag values:
            if bpo_exp_flag:
                exp_bpo_ct += 1
            if cco_exp_flag:  
                exp_cco_ct += 1
            if mfo_exp_flag:  
                exp_mfo_ct += 1
    return (exp_bpo_ct, exp_cco_ct, exp_mfo_ct)
开发者ID:arkatebi,项目名称:SwissProt-stats,代码行数:49,代码来源:Count_sp_genes.py

示例13: count_GOterms_with_EXP

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_GOterms_with_EXP(fh_sprot, taxon_id, EXP_default=set([])):
    '''
    This method extract the distinct GO terms for each gene that 
    have validation with any of the experimental evidence codes.
    A set is created for these GO terms for each gene and then 
    are placed in a dictionary of each ontological categories. 
    At the end, these THREE dictionaries are returned.
    '''
    mfo_terms = OrderedDict()
    bpo_terms = OrderedDict()
    cco_terms = OrderedDict()
    count = 0
    for rec in sp.parse(fh_sprot):
        # SELECT records that are related to a specific
        # taxon_id such as 559292 for yeast:
        if taxon_id in rec.taxonomy_id:
            protName = rec.accessions[0]
            # Initialize lists for adding GO terms:
            terms_mfo = set()
            terms_bpo = set()
            terms_cco = set()
            # Go over the list of DB cross references:
            for crossRef in rec.cross_references:
                # Consider the cross_reference entries that
                # relate to GO DB:
                if crossRef[0] == 'GO':
                    goList = [crossRef[1],
                              (crossRef[3].split(':'))[0],
                              crossRef[2][0]]
                    if (crossRef[3].split(':'))[0] in EXP_default:
#                        print goList
                        if goList[-1].upper() == 'F':
                            terms_mfo.add(goList[0])
                        elif goList[-1].upper() == 'P':
                            terms_bpo.add(goList[0])
                        elif goList[-1].upper() == 'C':
                            terms_cco.add(goList[0])
            # Increase gene counts in BPO, CCO, and MFO categories
            # depending on the corresponding flag values:
            mfo_terms[protName] = terms_mfo
            bpo_terms[protName] = terms_bpo
            cco_terms[protName] = terms_cco
            count += 1
            if count > 20: 
                break
            #break
    return (mfo_terms, bpo_terms, cco_terms) 
开发者ID:arkatebi,项目名称:SwissProt-stats,代码行数:49,代码来源:Count_sprot_GOterms.py

示例14: check_sprot_format

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def check_sprot_format(fh_sprot):
    """
    This method checks whether the format of the file
    (with file handle fh_sprot) is in UniProtKB/Swissprot format.
    If the file is in UniProtKB/Swissprot format format,
        it returns True
    Otherwise,
       it returns False.
    """
    iter_handle = sp.parse(fh_sprot) # sp.parse method returns a generator
    try:
        for rec in iter_handle:
            break
    except:
        return False
    else:
        return True
开发者ID:arkatebi,项目名称:CAFA-Toolset,代码行数:19,代码来源:FormatChecker.py

示例15: count_genes_with_EXP

# 需要导入模块: from Bio import SwissProt [as 别名]
# 或者: from Bio.SwissProt import parse [as 别名]
def count_genes_with_EXP(fh_sprot, taxon_id, EXP_default=set([])):
    gene_count = {} 
    gene_count['MFO'] = 0
    gene_count['BPO'] = 0
    gene_count['CCO'] = 0

    for rec in sp.parse(fh_sprot):
        # SELECT records that are related to a specific
        # taxon_id such as 559292 for yeast:
        if taxon_id in rec.taxonomy_id:
            # Three flags to check whether an Exp evidence is found
            # in any of BPO, CCO, and MFO ontological categories:
            exp_flag = {}
            exp_flag['MFO'] = False
            exp_flag['BPO'] = False
            exp_flag['CCO'] = False

            # Go over the list of DB cross references:
            for crossRef in rec.cross_references:
                # Consider the cross_reference entries that
                # relate to GO DB:
                if crossRef[0] == 'GO':
                    goList = [crossRef[1],
                              (crossRef[3].split(':'))[0],
                              crossRef[2][0]]
                    if (crossRef[3].split(':'))[0] in EXP_default:
                        if goList[-1].upper() == 'F':
                            exp_flag['MFO'] = True
                        elif goList[-1].upper() == 'P':
                            exp_flag['BPO'] = True
                        elif goList[-1].upper() == 'C':
                            exp_flag['CCO'] = True
                # Whenever an exp evidence for all three ontological 
                # categories are found, break out the loop:
                if (exp_flag['MFO'] and exp_flag['BPO'] and exp_flag['CCO']):
                    break
            # Increase gene counts in BPO, CCO, and MFO categories
            # depending on the corresponding flag values:
            if exp_flag['MFO']:
                gene_count['MFO'] += 1
            if exp_flag['BPO']:
                gene_count['BPO'] += 1
            if exp_flag['CCO']:
                gene_count['CCO'] += 1
    return gene_count
开发者ID:arkatebi,项目名称:SwissProt-stats,代码行数:47,代码来源:Count_sp_genes.py


注:本文中的Bio.SwissProt.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。