当前位置: 首页>>代码示例>>Python>>正文


Python Entrez.esearch方法代码示例

本文整理汇总了Python中Bio.Entrez.esearch方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.esearch方法的具体用法?Python Entrez.esearch怎么用?Python Entrez.esearch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Entrez的用法示例。


在下文中一共展示了Entrez.esearch方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_taxid_from_species

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_taxid_from_species(self, species):
        #if it is a species name try to get the taxid
        taxid = species
        if not species.isdigit():
            Entrez.email = EMBL.PREVIOUS_VALUES["email"]
            #fetch taxid from ncbi taxomomy
            logging.debug("Fetch the taxid from species name using Entrez.esearch")
            species =  species.replace(" ", "+").strip()
            try:
                search = Entrez.esearch(term=species, db="taxonomy", retmode="xml")
                record = Entrez.read(search)
                if not record['IdList']: #no taxid found
                    logging.warning("Please verify the species name. '%s' species is unknown into the NCBI taxonomy databse. Impossible to check the taxonomic classification. We will use the default value 'Life' to populate the OC line.",self.species)
                    taxid=None
                else:
                    taxid = record['IdList'][0]
            except IOError as e:
                logging.error("Could not get taxid from species: %s" % e)

        return taxid 
开发者ID:NBISweden,项目名称:EMBLmyGFF3,代码行数:22,代码来源:EMBLmyGFF3.py

示例2: get_GIs

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_GIs(self, accessions, n_entrez=2500, **kwargs):
        '''
        Use entrez esearch to get genbank identifiers from accession numbers
        '''
        retmax = 10**5  # max records to retrieve at once; 10^5 is documented limit, but >2500 reproducibly throws errors
        queries = []
        giList = []

        for i in sorted(xrange(0, len(accessions), n_entrez)): # split accessions list into 2500-long portions
            queries.append(" ".join(accessions[i:i+n_entrez])) # convert list to ' ' separated string

        assert sum([len(q.split()) for q in queries]) == len(accessions) # sanity check

        for q in queries:
            handle = Entrez.esearch(db=self.gbdb, term=q, retmax=retmax)    # retrieve xml of search results
            giList += Entrez.read(handle)['IdList'] # pull GI numbers from handle
        return giList 
开发者ID:nextstrain,项目名称:fauna,代码行数:19,代码来源:parse.py

示例3: get_asm_uids

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_asm_uids(args, taxon_uid):
    """Return a set of NCBI UIDs associated with the passed taxon.

    :param args:  Namespace, command-line arguments
    :param taxon_uid:  str, NCBI taxon ID

    This query at NCBI returns all assemblies for the taxon subtree
    rooted at the passed taxon_uid.
    """
    logger = logging.getLogger(__name__)

    query = f"txid{taxon_uid}[Organism:exp]"
    logger.info("Entrez ESearch with query: %s", query)

    # Perform initial search for assembly UIDs with taxon ID as query.
    # Use NCBI history for the search.
    handle = entrez_retry(
        args,
        logger,
        Entrez.esearch,
        db="assembly",
        term=query,
        format="xml",
        usehistory="y",
    )
    record = Entrez.read(handle, validate=False)
    result_count = int(record["Count"])
    logger.info("Entrez ESearch returns %d assembly IDs", result_count)

    # Recover assembly UIDs from the web history
    asm_ids = entrez_batch_webhistory(
        args, logger, record, result_count, 250, db="assembly", retmode="xml"
    )
    logger.info("Identified %d unique assemblies", len(asm_ids))
    return asm_ids


# Extract filestem from Entrez eSummary 
开发者ID:widdowquinn,项目名称:pyani,代码行数:40,代码来源:genbank_get_genomes_by_taxon.py

示例4: get_tax_id

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_tax_id(self, query_name):
        """to get data from ncbi taxomomy, we need to have the taxid. we can
        get that by passing the species name to esearch, which will return
        the tax id"""
        query_name = query_name.replace(' ', "+").strip()
        Entrez.email = 'A.N.Other@example.com'
        search = Entrez.esearch(term=query_name, db="taxonomy", retmode="xml")
        record = Entrez.read(search)
        return record['IdList'][0] if record['IdList'] else None 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:11,代码来源:handleGB.py

示例5: searchEntrez

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def searchEntrez(accession_list,bio_type):
    start_time = time.time()
    Entrez.email = "nsalomonis@gmail.com" # Always tell NCBI who you are
    index=0; gi_list=[]
    while index<len(accession_list)+20:
        try: new_accession_list = accession_list[index:index+20]
        except IndexError: new_accession_list = accession_list[index:]
        if len(new_accession_list)<1: break
        search_handle = Entrez.esearch(db=bio_type,term=string.join(new_accession_list,','))
        search_results = Entrez.read(search_handle)
        gi_list += search_results["IdList"]
        index+=20
    end_time = time.time(); time_diff = int(end_time-start_time)
    print "finished in %s seconds" % time_diff
    return gi_list 
开发者ID:nsalomonis,项目名称:altanalyze,代码行数:17,代码来源:IdentifyAltIsoforms.py

示例6: test

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def test(self):
        '''
        Test Entrez API is connecting and working.
        Looks up symbol APOBEC3G, Entrez ID 60489 should be
        amongst the results
            '''
        Ent = Entrez.esearch(db="gene", term="(APOBEC3G[Preferred+Symbol])",
                             retmode="text", retmax=1000000)
        res = Entrez.read(Ent)
        Ent.close()
        if "60489" in res['IdList']:
            return 1
        else:
            return 0 
开发者ID:CGATOxford,项目名称:CGATPipelines,代码行数:16,代码来源:PipelineGeneInfo.py

示例7: download_all

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def download_all(self, host, count=1000000000):
        '''
        Gets all the Gene IDs for a particular host, specified in PARAMS, from
        Entrez Gene and returns them as a list.
        '''
        # Limited to IDs which are current and not obsolete (i.e. "alive")
        term = '("alive"[Properties]) AND %s[Taxonomy ID]' % host

        Ent = Entrez.esearch(db="gene", term=term, retmode="text",
                             retmax=count)
        res = Entrez.read(Ent)
        Ent.close()

        return res['IdList'] 
开发者ID:CGATOxford,项目名称:CGATPipelines,代码行数:16,代码来源:PipelineGeneInfo.py

示例8: get_gene_sequence

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_gene_sequence(gene_name):
    try:
        gene_file = '../../gene_sequences/%s_sequence.txt' % gene_name
        #gene_file = '../gene_sequences/%s_sequence.txt' % gene_name
        #gene_file = 'gene_sequences/%s_sequence.txt' % gene_name
        with open(gene_file, 'rb') as f:
            seq = f.read()
            seq = seq.replace('\r\n', '')
    except:
        raise Exception("could not find gene sequence file %s, please see examples and generate one for your gene as needed, with this filename" % gene_file)

    return seq

    # gene_positions = {'CCDC101': [28553928,28591790]}
    # search = Entrez.esearch(db="gene", term='%s[Gene Name] AND Homo Sapiens[Organism]' % (gene_name))
    # records = Entrez.read(search)

    # if len(records['IdList']) > 1:
    #     print "warning, multiple hits found for entrez gene search %s" % gene_name

    # elink = Entrez.read(Entrez.elink(dbfrom="gene", db='nucleotide', id=records['IdList'][0]))
    # nucl_id = elink[0]['LinkSetDb'][3]

    # cut = False
    # if nucl_id['LinkName'] != 'gene_nuccore_refseqgene':
    #     if gene_name in gene_positions.keys():
    #         nucl_id = elink[0]['LinkSetDb'][0]['Link'][0]['Id']
    #         cut = True
    #     else:
    #         print "sorry not enough information to return sequence"
    #         return None
    # else:
    #     nucl_id = nucl_id['Link'][0]['Id']

    # handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
    # record = SeqIO.read(handle, "genbank")
    # handle.close()

    # if cut:
    #     start, end = gene_positions[gene_name]
    #     return str(record.seq)[start:end]
    # else:
    #     return str(record.seq) 
开发者ID:MicrosoftResearch,项目名称:Azimuth,代码行数:45,代码来源:util.py

示例9: search

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def search(self):
        try:
            self.searchSig.emit("searching")
            self.init_list()
            self.ctrl_text() #文本还原
            self.NCBI_model.list_checked = []
            self.database = self.comboBox_2.currentText()
            keywords = self.lineEdit.text()
            email = self.lineEdit_2.text()
            email = email if email else "A.N.Other@example.com"
            Entrez.email = email
            search_handle = Entrez.esearch(db=self.database,term=keywords,
                                       usehistory="y")
            search_results = Entrez.read(search_handle)
            self.webenv = search_results["WebEnv"]
            self.query_key = search_results["QueryKey"]
            self.count = int(search_results["Count"])
            self.ctrlItemsSig.emit(self.count) #如果只有2个序列,self.display_items也会变成2
            search_handle.close()
            batch_size = 20
            self.searchSig.emit("fetching")
            # time_start = time.time()
            total_displayed = self.display_items
            if self.count < total_displayed:
                total_displayed = self.count
            for start in range(0, total_displayed, batch_size):
                # try:
                if self.interrupt:
                    return
                end = min(total_displayed, start + batch_size)
                print("Going to download record %i to %i" % (start + 1, end))
                if (start + batch_size) > total_displayed:
                    batch_size = total_displayed - start
                fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
                                             retstart=start, retmax=batch_size,
                                             webenv=self.webenv, query_key=self.query_key)
                fetch_records = Entrez.read(fetch_handle)
                for num, record in enumerate(fetch_records):
                    list_ = []
                    for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
                              "GBSeq_update-date",
                              "GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
                              "GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
                              "GBSeq_comment"]:
                        if i in record:
                            list_.append(str(record[i]))
                        else:
                            list_.append("N/A")
                    self.updateSig.emit(list_)
                    self.progressBarSig.emit((start + num + 1) * 100 / total_displayed)
                fetch_handle.close()
                # except:
                #     pass
            self.searchSig.emit("finished")
        except:
            self.searchSig.emit("except")
            self.exception_signal.emit(''.join(
                traceback.format_exception(
                    *sys.exc_info())))
        # time_end = time.time()
        # print("time:", time_end - time_start) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:63,代码来源:Lg_SerhNCBI.py


注:本文中的Bio.Entrez.esearch方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。