当前位置: 首页>>代码示例>>Python>>正文


Python Entrez.read方法代码示例

本文整理汇总了Python中Bio.Entrez.read方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.read方法的具体用法?Python Entrez.read怎么用?Python Entrez.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Entrez的用法示例。


在下文中一共展示了Entrez.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fetch_chrom_name

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetch_chrom_name(id):
    try:
        if not id.startswith("NC_"):
            return id
        Entrez.email = "vcf-kit@vcf-kit.com"
        chrom = Entrez.read(Entrez.efetch(db="nuccore", id=id, rettype="gb", retmode="xml"))
        gb_feature_quals = chrom[0]["GBSeq_feature-table"][0]["GBFeature_quals"]
        features = dict([x.values() for x in gb_feature_quals])
        if "organelle" in features:
            if features["organelle"] == "mitochondrion":
                return "MtDNA"
        else:
            chrom_name = features["chromosome"]
            return chrom_name
    except:
        return id 
开发者ID:AndersenLab,项目名称:VCF-kit,代码行数:18,代码来源:genome.py

示例2: get_species_from_taxid

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_species_from_taxid(self, taxid):
        #if it is an integer (a taxid), try to get the species name
        species = taxid
        if taxid.isdigit():
            Entrez.email = EMBL.PREVIOUS_VALUES["email"]
            # fetch the classification sufing the taxid
            logging.debug("Fetch The Lineage using Entrez.efetch")
            try:
                search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
                data = Entrez.read(search)
                species = data[0]['ScientificName']
            except IOError as e:
                logging.error("Could not get species from taxid: %s" % e)

        return "%s%s" % (species[0].upper(), species[1:].lower())

    #if species is a taxid we change by the species name 
开发者ID:NBISweden,项目名称:EMBLmyGFF3,代码行数:19,代码来源:EMBLmyGFF3.py

示例3: get_taxid_from_species

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_taxid_from_species(self, species):
        #if it is a species name try to get the taxid
        taxid = species
        if not species.isdigit():
            Entrez.email = EMBL.PREVIOUS_VALUES["email"]
            #fetch taxid from ncbi taxomomy
            logging.debug("Fetch the taxid from species name using Entrez.esearch")
            species =  species.replace(" ", "+").strip()
            try:
                search = Entrez.esearch(term=species, db="taxonomy", retmode="xml")
                record = Entrez.read(search)
                if not record['IdList']: #no taxid found
                    logging.warning("Please verify the species name. '%s' species is unknown into the NCBI taxonomy databse. Impossible to check the taxonomic classification. We will use the default value 'Life' to populate the OC line.",self.species)
                    taxid=None
                else:
                    taxid = record['IdList'][0]
            except IOError as e:
                logging.error("Could not get taxid from species: %s" % e)

        return taxid 
开发者ID:NBISweden,项目名称:EMBLmyGFF3,代码行数:22,代码来源:EMBLmyGFF3.py

示例4: AS

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def AS(self):
        """
        The AS (ASsembly Information) lines provide information on the composition of
        a TPA or TSA sequence. These lines include information on local sequence spans
        (those spans seen in the sequence of the entry showing the AS lines) plus
        identifiers and base spans of contributing primary sequences (for ENA
        primary entries only).

        a) LOCAL_SPAN               base span on local sequence shown in entry
        b) PRIMARY_IDENTIFIER       acc.version of contributing ENA sequence(s)
                                    or trace identifier for ENA read(s)
        c) PRIMARY_SPAN             base span on contributing ENA primary
                                    sequence or not_available for ENA read(s)

        d) COMP                     'c' is used to indicate that contributing sequence
                                    originates from complementary strand in primary
                                    entry
        """
        output = ""
        for assembly in self.assembly_information:
            output += "AS   %s%s%s%s" % ("{:16}".format(assembly['local_span']),
                                         "{:24}".format(assembly['identifier']),
                                         "{:18}".format(assembly['primary_span']),
                                         assembly['complementary'])
        return output 
开发者ID:NBISweden,项目名称:EMBLmyGFF3,代码行数:27,代码来源:EMBLmyGFF3.py

示例5: get_GIs

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_GIs(self, accessions, n_entrez=2500, **kwargs):
        '''
        Use entrez esearch to get genbank identifiers from accession numbers
        '''
        retmax = 10**5  # max records to retrieve at once; 10^5 is documented limit, but >2500 reproducibly throws errors
        queries = []
        giList = []

        for i in sorted(xrange(0, len(accessions), n_entrez)): # split accessions list into 2500-long portions
            queries.append(" ".join(accessions[i:i+n_entrez])) # convert list to ' ' separated string

        assert sum([len(q.split()) for q in queries]) == len(accessions) # sanity check

        for q in queries:
            handle = Entrez.esearch(db=self.gbdb, term=q, retmax=retmax)    # retrieve xml of search results
            giList += Entrez.read(handle)['IdList'] # pull GI numbers from handle
        return giList 
开发者ID:nextstrain,项目名称:fauna,代码行数:19,代码来源:parse.py

示例6: get_tax_names

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_tax_names(taxa):
    """Get tax names from ids or string"""

    logging.debug('Checking tax inputs')

    def splitter(s):
        return re.split('\s*,\s*', s)

    tax_ids = []
    if os.path.isfile(taxa):
        for line in open(taxa):
            tax_ids.extend(splitter(line.rstrip()))
    else:
        tax_ids = splitter(taxa)

    tax_names = []
    for tax in tax_ids:
        logging.debug('Tax {}'.format(tax))

        if tax.isdigit():
            handle = Entrez.efetch(db='taxonomy', id=tax)
            results = Entrez.read(handle)
            if results:
                name = results[0].get('ScientificName')
                if name:
                    tax_names.append(name)
        else:
            tax_names.append(tax)

    return set(tax_names)


# -------------------------------------------------- 
开发者ID:kyclark,项目名称:bioinformatics_primer,代码行数:35,代码来源:solution.py

示例7: entrez_batch_webhistory

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def entrez_batch_webhistory(args, record, expected, batchsize, *fnargs, **fnkwargs):
    """Recover Entrez data from a prior NCBI webhistory search.

    :param args:  Namespace, command-line arguments
    :param record:  Entrez webhistory record
    :param expected:  int, number of expected search returns
    :param batchsize:  int, number of search returns to retrieve in each batch
    :param *fnargs:  tuple, arguments to Efetch
    :param **fnkwargs:  dict, keyword arguments to Efetch

    Recovery is performed in in batches of defined size, using Efetch.
    Returns all results as a list.
    """
    logger = logging.getLogger(__name__)

    results = []
    for start in range(0, expected, batchsize):
        batch_handle = entrez_retry(
            args,
            logger,
            Entrez.efetch,
            retstart=start,
            retmax=batchsize,
            webenv=record["WebEnv"],
            query_key=record["QueryKey"],
            *fnargs,
            **fnkwargs,
        )
        batch_record = Entrez.read(batch_handle, validate=False)
        results.extend(batch_record)
    return results


# Get assembly UIDs for the root taxon 
开发者ID:widdowquinn,项目名称:pyani,代码行数:36,代码来源:genbank_get_genomes_by_taxon.py

示例8: get_asm_uids

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_asm_uids(args, taxon_uid):
    """Return a set of NCBI UIDs associated with the passed taxon.

    :param args:  Namespace, command-line arguments
    :param taxon_uid:  str, NCBI taxon ID

    This query at NCBI returns all assemblies for the taxon subtree
    rooted at the passed taxon_uid.
    """
    logger = logging.getLogger(__name__)

    query = f"txid{taxon_uid}[Organism:exp]"
    logger.info("Entrez ESearch with query: %s", query)

    # Perform initial search for assembly UIDs with taxon ID as query.
    # Use NCBI history for the search.
    handle = entrez_retry(
        args,
        logger,
        Entrez.esearch,
        db="assembly",
        term=query,
        format="xml",
        usehistory="y",
    )
    record = Entrez.read(handle, validate=False)
    result_count = int(record["Count"])
    logger.info("Entrez ESearch returns %d assembly IDs", result_count)

    # Recover assembly UIDs from the web history
    asm_ids = entrez_batch_webhistory(
        args, logger, record, result_count, 250, db="assembly", retmode="xml"
    )
    logger.info("Identified %d unique assemblies", len(asm_ids))
    return asm_ids


# Extract filestem from Entrez eSummary 
开发者ID:widdowquinn,项目名称:pyani,代码行数:40,代码来源:genbank_get_genomes_by_taxon.py

示例9: fetchCBS

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetchCBS(self):
        dict_ = {"1":"0", "2":"1", "5":"4", "9":"7"}
        out = ["NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA"]
        if str(self.codeTable) not in dict_: return out  ##记得改
        code = dict_[str(self.codeTable)]
        os.chdir(self.path)
        infile = "codonW_infile.fas"
        outfile = "codonW_outfile.txt"
        blkfile = "codonW_blk.txt"
        errorfile = "codonW_error.fas"
        with open(self.path + os.sep + infile, "w", encoding="utf-8") as f:
            f.write(">seq\n%s\n"%self.seq)
        command = '"%s" "%s" "%s" "%s" -all_indices -nomenu -silent -noblk -code %s'%(self.codonW, infile, outfile, blkfile, code)
        # print(command)
        popen = self.factory.init_popen(command)
        try:
            while True:
                try:
                    out_line = popen.stdout.readline().decode("utf-8", errors='ignore')
                except UnicodeDecodeError:
                    out_line = popen.stdout.readline().decode("gbk", errors='ignore')
                if out_line == "" and popen.poll() is not None:
                    break
        except: pass
        ## 读取输出结果
        if not os.path.exists(self.path + os.sep + outfile):
            with open(errorfile, "a", encoding="utf-8") as f2:
                f2.write(command + "\n" + self.seq + "\n")
            # print("error seq.:", self.seq)
        else:
            with open(self.path + os.sep + outfile, encoding="utf-8", errors="ignore") as f1:
                content = f1.read()
            try:
                list_ = content.split("\n")[1].split("\t")
                out = list_[5:9] + list_[11:15]
            except IndexError:
                with open(errorfile, "a", encoding="utf-8") as f2:
                    f2.write(command + "\n" + self.seq + "\n")
        for num, i in enumerate(out):
            if (not self.is_float(i)) and (not self.is_int(i)): out[num] = "NA"
        return out 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:43,代码来源:handleGB.py

示例10: merge_file_contents

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def merge_file_contents(self, files, base=None, proportion=None, processSig=None):
        all_content = ""
        for num, file in enumerate(files):
            with open(file, encoding="utf-8", errors='ignore') as f:
                all_content += f.read()
            if processSig:
                processSig.emit(base + (num+1)*proportion/len(files))
        return all_content 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:10,代码来源:handleGB.py

示例11: fetchContentsByIDs

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetchContentsByIDs(self, IDs, base=None, proportion=None, processSig=None):
        contents = ""
        for num, ID in enumerate(IDs):
            ID_path = self.fetchRecordPath(ID)
            with open(ID_path, encoding="utf-8", errors='ignore') as f:
                contents += f.read()
            if processSig:
                processSig.emit(base + (num+1)*proportion/len(IDs))
        return contents

    # def fetchIDsByContents(self, contents):
    #     '''注意这个ID是locus的ID'''
    #     rgx = re.compile(r"(?sm)LOCUS {7}(\S+).+?^//\s*?(?=LOCUS|$)")
    #     return rgx.findall(contents) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:16,代码来源:handleGB.py

示例12: get_tax_id

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_tax_id(self, query_name):
        """to get data from ncbi taxomomy, we need to have the taxid. we can
        get that by passing the species name to esearch, which will return
        the tax id"""
        query_name = query_name.replace(' ', "+").strip()
        Entrez.email = 'A.N.Other@example.com'
        search = Entrez.esearch(term=query_name, db="taxonomy", retmode="xml")
        record = Entrez.read(search)
        return record['IdList'][0] if record['IdList'] else None 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:11,代码来源:handleGB.py

示例13: downloadSeq

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def downloadSeq(self):
        try:
            checked_ids = self.NCBI_model.list_checked
            # if not checked_ids:
            #     checked_ids = self.NCBI_model.fetchAllIDs()
            batch_size = 20
            count = len(checked_ids) if checked_ids else self.count
            self.download_contents = ""
            for start in range(0, count, batch_size):
                if self.interrupt:
                    return
                end = min(count, start + batch_size)
                print("Going to download record %i to %i" % (start + 1, end))
                if (start + batch_size) > count:
                    batch_size = count - start
                if not checked_ids:
                    #下载所有序列的模式
                    fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
                                                 retstart=start, retmax=batch_size,
                                                 webenv=self.webenv, query_key=self.query_key)
                else:
                    fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
                                             retstart=start, retmax=batch_size, id=checked_ids)
                self.download_contents += fetch_handle.read()
                self.progressDiologSig.emit(end * 100 / count)
            # index = self.comboBox.currentIndex()
            # filepath = self.comboBox.itemData(index, role=Qt.ToolTipRole)
            # self.downloadFinished.emit()
        except:
            self.exception_signal.emit(''.join(
                traceback.format_exception(
                    *sys.exc_info()))) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:34,代码来源:Lg_SerhNCBI.py

示例14: addition_search

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def addition_search(self):
        try:
            total_displayed = self.display_items
            if self.count < total_displayed:
                total_displayed = self.count
            batch_size = 20
            self.searchSig.emit("fetching")
            for start in range(self.exist_base, total_displayed, batch_size):
                if self.interrupt:
                    break
                end = min(total_displayed, start + batch_size)
                print("Going to download record %i to %i" % (start + 1, end))
                if (start + batch_size) > total_displayed:
                    batch_size = total_displayed - start
                fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
                                             retstart=start, retmax=batch_size,
                                             webenv=self.webenv, query_key=self.query_key)
                fetch_records = Entrez.read(fetch_handle)
                for num, record in enumerate(fetch_records):
                    list_ = []
                    for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
                              "GBSeq_update-date",
                              "GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
                              "GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
                              "GBSeq_comment"]:
                        if i in record:
                            list_.append(str(record[i]))
                        else:
                            list_.append("N/A")
                    self.updateSig.emit(list_)
                    self.progressBarSig.emit((start - self.exist_base + num + 1) * 100 / (total_displayed - self.exist_base))
                # self.progressBarSig.emit((start - self.exist_base)*100/(total_displayed - self.exist_base))
                fetch_handle.close()
            self.searchSig.emit("finished")
        except:
            self.searchSig.emit("except")
            self.exception_signal.emit(''.join(
                traceback.format_exception(
                    *sys.exc_info()))) 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:41,代码来源:Lg_SerhNCBI.py

示例15: get_taxid_mapping_for_batch

# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_taxid_mapping_for_batch(taxids, taxid2wikidict, mutex, semaphore, max_attempt=3):
        ''' Get wiki mapping for a list of taxids '''
        taxid_str = ",".join(taxids)
        log.write(f"fetching batch {taxid_str}")
        for attempt in range(max_attempt):
            try:
                handle = Entrez.elink(dbfrom="taxonomy", id=taxid_str, cmd="llinks")
                record = Entrez.read(handle)
                handle.close()

                parsed = {}
                results = record[0]['IdUrlList']['IdUrlSet']
                for result in results:
                    taxid = result['Id']
                    wikiurl = ""
                    for link in result['ObjUrl']:
                        url = str(link['Url'])
                        if re.search('wikipedia.org', url):
                            wikiurl = url
                            break
                    parsed[taxid] = wikiurl
                break
            except:
                log.write(f"failed batch attempt {attempt}")
                time.sleep(5)
        semaphore.release()
        with mutex:
            taxid2wikidict.update(parsed) 
开发者ID:chanzuckerberg,项目名称:idseq-dag,代码行数:30,代码来源:fetch_tax_info.py


注:本文中的Bio.Entrez.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。