当前位置: 首页>>代码示例>>Python>>正文


Python Entrez.efetch方法代码示例

本文整理汇总了Python中Bio.Entrez.efetch方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.efetch方法的具体用法?Python Entrez.efetch怎么用?Python Entrez.efetch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Entrez的用法示例。


在下文中一共展示了Entrez.efetch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_ncbi_seq

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_ncbi_seq(email, db, rettype, accession):

    # fetch
    print("Fetching accession %s from GenBank\n" % (accession))

    Entrez.email = email

    try:
        handle = Entrez.efetch(
            db=db,
            rettype=rettype,
            retmode="text",
            id=accession
        )
        res = handle.read()

        # for testing only
        # pickle_item(res, accession)
        return res

    except Exception:
        sys.stderr.write("Error! Cannot fetch: %s        \n" % accession) 
开发者ID:phageParser,项目名称:phageParser,代码行数:24,代码来源:add_organism.py

示例2: fetch_names

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetch_names(id_list):
    organism_names = {}

    # Doing 100 by 100 to make sure requests to NCBI are not too big
    for i in range(0, len(id_list), 100):
        j = i + 100
        if j >= len(id_list):
            j = len(id_list)

        sys.stderr.write(
            "Fetching entries from %s to %s from GenBank\n" % (i, j))
        sys.stderr.flush()
        result_handle = Entrez.efetch(db=db, rettype="gb", id=id_list[i:j])

        # Populate result per organism name
        for record in parse(result_handle, 'genbank'):
            # Using NCBI name, which should match accession number passed
            organism_names[record.name] = record.annotations['organism']

    return organism_names 
开发者ID:phageParser,项目名称:phageParser,代码行数:22,代码来源:organism_name_update.py

示例3: get_species_from_taxid

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_species_from_taxid(self, taxid):
        #if it is an integer (a taxid), try to get the species name
        species = taxid
        if taxid.isdigit():
            Entrez.email = EMBL.PREVIOUS_VALUES["email"]
            # fetch the classification sufing the taxid
            logging.debug("Fetch The Lineage using Entrez.efetch")
            try:
                search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
                data = Entrez.read(search)
                species = data[0]['ScientificName']
            except IOError as e:
                logging.error("Could not get species from taxid: %s" % e)

        return "%s%s" % (species[0].upper(), species[1:].lower())

    #if species is a taxid we change by the species name 
开发者ID:NBISweden,项目名称:EMBLmyGFF3,代码行数:19,代码来源:EMBLmyGFF3.py

示例4: populate_organism

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def populate_organism():
    def add_organism(name, accession):
        # get the object, this also checks for duplicates
        o, created = Organism.objects.get_or_create(
            name=name, accession=accession)
        return o

    def merge_acc_names(accession_list):
        acc_name_dict = {}
        db = "nuccore"
        # Doing batches of 200 to make sure requests to NCBI are not too big
        for i in range(0, len(accession_list), 200):
            j = i + 200

            result_handle = Entrez.efetch(
                db=db, rettype="gb", id=accession_list[i:j])

            # Populate result per organism name
            records = SeqIO.parse(result_handle, 'genbank')
            for record in tqdm(records):
                # Using NCBI name, which should match accession number passed
                acc_name_dict[record.name] = record.annotations['organism']
        return acc_name_dict

    with open(os.path.join(DATA_DIR, 'bac_accession_list.txt')) as f:
        acc_name_dict = list(read_accession_file(f))

    # acc_name_dict = merge_acc_names(accession_list)
    for acc in acc_name_dict:
        add_organism(name=acc_name_dict[acc], accession=acc) 
开发者ID:phageParser,项目名称:phageParser,代码行数:32,代码来源:populate.py

示例5: populate_anticrispr

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def populate_anticrispr():
    with open(os.path.join(DATA_DIR, 'antiCRISPR_accessions.txt')) as f:
        accession_list = list(read_accession_file(f))
    print("Fetching AntiCRISPR entries")
    result_handle = Entrez.efetch(
        db='protein', rettype="fasta", id=accession_list)
    for record in tqdm(SeqIO.parse(result_handle, 'fasta')):
        spacer, _ = AntiCRISPR.objects.get_or_create(
            accession=record.name,
            sequence=str(record.seq))
        spacer.save() 
开发者ID:phageParser,项目名称:phageParser,代码行数:13,代码来源:populate.py

示例6: get_tax_names

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_tax_names(taxa):
    """Get tax names from ids or string"""

    logging.debug('Checking tax inputs')

    def splitter(s):
        return re.split('\s*,\s*', s)

    tax_ids = []
    if os.path.isfile(taxa):
        for line in open(taxa):
            tax_ids.extend(splitter(line.rstrip()))
    else:
        tax_ids = splitter(taxa)

    tax_names = []
    for tax in tax_ids:
        logging.debug('Tax {}'.format(tax))

        if tax.isdigit():
            handle = Entrez.efetch(db='taxonomy', id=tax)
            results = Entrez.read(handle)
            if results:
                name = results[0].get('ScientificName')
                if name:
                    tax_names.append(name)
        else:
            tax_names.append(tax)

    return set(tax_names)


# -------------------------------------------------- 
开发者ID:kyclark,项目名称:bioinformatics_primer,代码行数:35,代码来源:solution.py

示例7: fetch_from_entrez

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetch_from_entrez(index, cache_dir=False):
    logger = logging.getLogger('build')

    # slugify the index for the cache filename (some indices have symbols not allowed in file names (e.g. /))
    index_slug= slugify(index)
    cache_file_path = '{}/{}'.format('/'.join(cache_dir), index_slug)

    # try fetching from cache
    if cache_dir:
        d = fetch_from_cache(cache_dir, index_slug)
        if d:
            logger.info('Fetched {} from cache'.format(cache_file_path))
            return d
    
    # if nothing is found in the cache, use the web API
    logger.info('Fetching {} from Entrez'.format(index))
    tries = 0
    max_tries = 5
    while tries < max_tries:
        if tries > 0:
            logger.warning('Failed fetching pubmed {}, retrying'.format(str(index)))
            
        try:
            Entrez.email = 'info@gpcrdb.org'
            handle = Entrez.efetch(
                db="pubmed", 
                id=str(index), 
                rettype="medline", 
                retmode="text"
            )
        except:
            tries += 1
            time.sleep(2)
        else:
            d = Medline.read(handle)

            # save to cache
            save_to_cache(cache_dir, index_slug, d)
            logger.info('Saved entry for {} in cache'.format(cache_file_path))
            return d 
开发者ID:protwis,项目名称:protwis,代码行数:42,代码来源:tools.py

示例8: entrez_batch_webhistory

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def entrez_batch_webhistory(args, record, expected, batchsize, *fnargs, **fnkwargs):
    """Recover Entrez data from a prior NCBI webhistory search.

    :param args:  Namespace, command-line arguments
    :param record:  Entrez webhistory record
    :param expected:  int, number of expected search returns
    :param batchsize:  int, number of search returns to retrieve in each batch
    :param *fnargs:  tuple, arguments to Efetch
    :param **fnkwargs:  dict, keyword arguments to Efetch

    Recovery is performed in in batches of defined size, using Efetch.
    Returns all results as a list.
    """
    logger = logging.getLogger(__name__)

    results = []
    for start in range(0, expected, batchsize):
        batch_handle = entrez_retry(
            args,
            logger,
            Entrez.efetch,
            retstart=start,
            retmax=batchsize,
            webenv=record["WebEnv"],
            query_key=record["QueryKey"],
            *fnargs,
            **fnkwargs,
        )
        batch_record = Entrez.read(batch_handle, validate=False)
        results.extend(batch_record)
    return results


# Get assembly UIDs for the root taxon 
开发者ID:widdowquinn,项目名称:pyani,代码行数:36,代码来源:genbank_get_genomes_by_taxon.py

示例9: get_tax_data

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_tax_data(self, taxid):
        """once we have the taxid, we can fetch the record"""
        Entrez.email = 'A.N.Other@example.com'
        search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
        return Entrez.read(search) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:7,代码来源:handleGB.py

示例10: fetSeqFromNCBI

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetSeqFromNCBI(self, id_array):
        batch_size = 20
        count = len(id_array)
        download_contents = ""
        for start in range(0, count, batch_size):
            if self.interrupt:
                return
            end = min(count, start + batch_size)
            print("Going to download record %i to %i" % (start + 1, end))
            if (start + batch_size) > count:
                batch_size = count - start
            Entrez.email = self.email if self.email else "A.N.Other@example.com"
            fetch_handle = Entrez.efetch(db="nucleotide", rettype=self.rettype, retmode="text",
                                         retstart=start, retmax=batch_size, id=id_array)
            download_contents += fetch_handle.read()
            self.progressDiologSig.emit(end * 100 / count)
        if self.rettype == "gb":
            self.inputContentSig.emit(
                download_contents, self.outputPath)
        else:
            with open(self.outputPath + os.sep + self.fasta_file_name, "w", encoding="utf-8") as f:
                f.write(download_contents)
            self.fastaDownloadFinishedSig.emit(self.outputPath)
        # result_handle = Entrez.efetch(
        #     db="nucleotide", rettype="gb",  id=id_array, retmode="text")
        # # with open(self.exportPath + os.sep + "new.gb", "w", encoding="utf-8") as f2:
        # #     f2.write(result_handle.read())
        # self.inputContentSig.emit(
        #     result_handle.read(), []) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:31,代码来源:Lg_addFiles.py

示例11: downloadSeq

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def downloadSeq(self):
        try:
            checked_ids = self.NCBI_model.list_checked
            # if not checked_ids:
            #     checked_ids = self.NCBI_model.fetchAllIDs()
            batch_size = 20
            count = len(checked_ids) if checked_ids else self.count
            self.download_contents = ""
            for start in range(0, count, batch_size):
                if self.interrupt:
                    return
                end = min(count, start + batch_size)
                print("Going to download record %i to %i" % (start + 1, end))
                if (start + batch_size) > count:
                    batch_size = count - start
                if not checked_ids:
                    #下载所有序列的模式
                    fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
                                                 retstart=start, retmax=batch_size,
                                                 webenv=self.webenv, query_key=self.query_key)
                else:
                    fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
                                             retstart=start, retmax=batch_size, id=checked_ids)
                self.download_contents += fetch_handle.read()
                self.progressDiologSig.emit(end * 100 / count)
            # index = self.comboBox.currentIndex()
            # filepath = self.comboBox.itemData(index, role=Qt.ToolTipRole)
            # self.downloadFinished.emit()
        except:
            self.exception_signal.emit(''.join(
                traceback.format_exception(
                    *sys.exc_info()))) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:34,代码来源:Lg_SerhNCBI.py

示例12: addition_search

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def addition_search(self):
        try:
            total_displayed = self.display_items
            if self.count < total_displayed:
                total_displayed = self.count
            batch_size = 20
            self.searchSig.emit("fetching")
            for start in range(self.exist_base, total_displayed, batch_size):
                if self.interrupt:
                    break
                end = min(total_displayed, start + batch_size)
                print("Going to download record %i to %i" % (start + 1, end))
                if (start + batch_size) > total_displayed:
                    batch_size = total_displayed - start
                fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
                                             retstart=start, retmax=batch_size,
                                             webenv=self.webenv, query_key=self.query_key)
                fetch_records = Entrez.read(fetch_handle)
                for num, record in enumerate(fetch_records):
                    list_ = []
                    for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
                              "GBSeq_update-date",
                              "GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
                              "GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
                              "GBSeq_comment"]:
                        if i in record:
                            list_.append(str(record[i]))
                        else:
                            list_.append("N/A")
                    self.updateSig.emit(list_)
                    self.progressBarSig.emit((start - self.exist_base + num + 1) * 100 / (total_displayed - self.exist_base))
                # self.progressBarSig.emit((start - self.exist_base)*100/(total_displayed - self.exist_base))
                fetch_handle.close()
            self.searchSig.emit("finished")
        except:
            self.searchSig.emit("except")
            self.exception_signal.emit(''.join(
                traceback.format_exception(
                    *sys.exc_info()))) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:41,代码来源:Lg_SerhNCBI.py

示例13: eutilsToFile

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def eutilsToFile(db,id,filename):
	Entrez.email = "jlever@bcgsc.ca"     # Always tell NCBI who you are
	handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="xml")
	with codecs.open(filename,'w','utf-8') as f:
		xml = handle.read()
		f.write(xml) 
开发者ID:jakelever,项目名称:pubrunner,代码行数:8,代码来源:pubrun.py

示例14: fastaRecordFromId_remote

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fastaRecordFromId_remote( self, id ):
        """
        experimental: fetch fasta records from remote database
        """
        from Bio import Entrez
        from Bio import SeqIO
        Entrez.email = "A.N.Other@example.com"
        if self.verbose: self.log.add_nobreak( 'r' )
        handle = Entrez.efetch(db="protein", rettype="fasta", id=id)
        frecord = SeqIO.read(handle, "fasta")
        frecord.id = str(id)
        handle.close()
        return frecord 
开发者ID:graik,项目名称:biskit,代码行数:15,代码来源:SequenceSearcher.py

示例15: download

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def download(self, ids):
        '''
        Fetch data from the Entrez Taxonomy database for the Taxonomy IDs in
        ids
        '''
        Ent = Entrez.efetch(db="taxonomy",
                            id=ids, retmode="xml", retmax=1000000)
        res = Entrez.read(Ent)
        self.dataset = res 
开发者ID:CGATOxford,项目名称:CGATPipelines,代码行数:11,代码来源:PipelineGeneInfo.py


注:本文中的Bio.Entrez.efetch方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。