当前位置: 首页>>代码示例>>Python>>正文


Python Model.addXref方法代码示例

本文整理汇总了Python中dipper.models.Model.Model.addXref方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addXref方法的具体用法?Python Model.addXref怎么用?Python Model.addXref使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.models.Model.Model的用法示例。


在下文中一共展示了Model.addXref方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _add_gene_equivalencies

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _add_gene_equivalencies(self, xrefs, gene_id, taxon):
        """
        Add equivalentClass and sameAs relationships

        Uses external resource map located in
        /resources/clique_leader.yaml to determine
        if an NCBITaxon ID space is a clique leader
        """

        clique_map = self.open_and_parse_yaml(self.resources['clique_leader'])

        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        model = Model(graph)
        filter_out = ['Vega', 'IMGT/GENE-DB', 'Araport']

        # deal with the dbxrefs
        # MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696

        for dbxref in xrefs.strip().split('|'):
            prefix = ':'.join(dbxref.split(':')[:-1]).strip()
            if prefix in self.localtt:
                prefix = self.localtt[prefix]
            dbxref_curie = ':'.join((prefix, dbxref.split(':')[-1]))

            if dbxref_curie is not None and prefix != '':
                if prefix == 'HPRD':  # proteins are not == genes.
                    model.addTriple(
                        gene_id, self.globaltt['has gene product'], dbxref_curie)
                    continue
                    # skip some of these for now based on curie prefix
                if prefix in filter_out:
                    continue

                if prefix == 'ENSEMBL':
                    model.addXref(gene_id, dbxref_curie)
                if prefix == 'OMIM':
                    if dbxref_curie in self.omim_replaced:
                        repl = self.omim_replaced[dbxref_curie]
                        for omim in repl:
                            if omim in self.omim_type and \
                                    self.omim_type[omim] == self.globaltt['gene']:
                                dbxref_curie = omim
                    if dbxref_curie in self.omim_type and \
                            self.omim_type[dbxref_curie] != self.globaltt['gene']:
                        continue
                try:
                    if self.class_or_indiv.get(gene_id) == 'C':
                        model.addEquivalentClass(gene_id, dbxref_curie)
                        if taxon in clique_map:
                            if clique_map[taxon] == prefix:
                                model.makeLeader(dbxref_curie)
                            elif clique_map[taxon] == gene_id.split(':')[0]:
                                model.makeLeader(gene_id)
                    else:
                        model.addSameIndividual(gene_id, dbxref_curie)
                except AssertionError as err:
                    LOG.warning("Error parsing %s: %s", gene_id, err)
开发者ID:monarch-initiative,项目名称:dipper,代码行数:62,代码来源:NCBIGene.py

示例2: _get_mappedids

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _get_mappedids(self, entry, g):
        """
        Extract the Orphanet and UMLS ids as equivalences from the entry
        :param entry:
        :return:
        """
        model = Model(g)
        omimid = 'OMIM:'+str(entry['mimNumber'])
        orpha_mappings = []
        if 'externalLinks' in entry:
            links = entry['externalLinks']
            if 'orphanetDiseases' in links:
                # triple semi-colon delimited list of
                # double semi-colon delimited orphanet ID/disease pairs
                # 2970;;566;;Prune belly syndrome
                items = links['orphanetDiseases'].split(';;;')
                for i in items:
                    # note 'internal_num unused
                    (orpha_num, internal_num, orpha_label) = i.split(';;')
                    orpha_id = 'Orphanet:'+orpha_num.strip()
                    orpha_mappings.append(orpha_id)
                    model.addClassToGraph(orpha_id, orpha_label.strip())
                    model.addXref(omimid, orpha_id)

            if 'umlsIDs' in links:
                umls_mappings = links['umlsIDs'].split(',')
                for i in umls_mappings:
                    umls_id = 'UMLS:'+i
                    model.addClassToGraph(umls_id, None)
                    model.addXref(omimid, umls_id)

        return
开发者ID:kshefchek,项目名称:dipper,代码行数:34,代码来源:OMIM.py

示例3: _process_straininfo

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_straininfo(self, limit):
        # line_counter = 0  # TODO unused
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        model = Model(g)

        logger.info("Processing measurements ...")
        raw = '/'.join((self.rawdir, self.files['straininfo']['file']))

        tax_id = 'NCBITaxon:10090'

        with open(raw, 'r') as f:
            reader = csv.reader(f, delimiter=',', quotechar='\"')
            self.check_header(self.files['straininfo']['file'], f.readline())
            for row in reader:
                (strain_name, vendor, stocknum, panel, mpd_strainid,
                 straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
                # C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
                # create the strain as an instance of the taxon
                if self.testMode and \
                        'MPD:' + str(mpd_strainid) not in self.test_ids:
                    continue
                strain_id = 'MPD-strain:' + str(mpd_strainid)
                model.addIndividualToGraph(strain_id, strain_name, tax_id)
                if mpdshortname.strip() != '':
                    model.addSynonym(strain_id, mpdshortname.strip())
                self.idlabel_hash[strain_id] = strain_name
                # make it equivalent to the vendor+stock
                if stocknum != '':
                    if vendor == 'J':
                        jax_id = 'JAX:'+stocknum
                        model.addSameIndividual(strain_id, jax_id)
                    elif vendor == 'Rbrc':
                        # reiken
                        reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
                        model.addSameIndividual(strain_id, reiken_id)
                    else:
                        if url != '':
                            model.addXref(strain_id, url, True)
                        if vendor != '':
                            model.addXref(
                                strain_id, ':'.join((vendor, stocknum)),
                                True)

                # add the panel information
                if panel != '':
                    desc = panel+' [panel]'
                    model.addDescription(strain_id, desc)

                # TODO make the panels as a resource collection

        return
开发者ID:DoctorBud,项目名称:dipper,代码行数:56,代码来源:MPD.py

示例4: _process_trait_mappings

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_trait_mappings(self, raw, limit=None):
        """
        This method mapps traits from/to ...

        Triples created:

        :param limit:
        :return:
        """
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        line_counter = 0
        model = Model(graph)

        with open(raw, 'r') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            next(filereader, None)  # skip header line
            for row in filereader:
                line_counter += 1
                # need to skip the last line
                if len(row) < 8:
                    LOG.info("skipping line %d: %s", line_counter, '\t'.join(row))
                    continue
                (vto_id, pto_id, cmo_id, ato_column, species, trait_class,
                 trait_type, qtl_count) = row

                ato_id = re.sub(
                    r'ATO #', 'AQTLTrait:', re.sub(
                        r'\].*', '', re.sub(r'\[', '', ato_column)))
                ato_id = ato_id.strip()

                ato_label = re.sub(r'.*\]\s*', '', ato_column)

                model.addClassToGraph(ato_id, ato_label.strip())

                if re.match(r'VT:.*', vto_id):
                    model.addClassToGraph(vto_id, None)
                    model.addEquivalentClass(ato_id, vto_id)
                if re.match(r'LPT:.*', pto_id):
                    model.addClassToGraph(pto_id, None)
                    model.addXref(ato_id, pto_id)
                if re.match(r'CMO:.*', cmo_id):
                    model.addClassToGraph(cmo_id, None)
                    model.addXref(ato_id, cmo_id)

        LOG.info("Done with trait mappings")
        return
开发者ID:TomConlin,项目名称:dipper,代码行数:51,代码来源:AnimalQTLdb.py

示例5: _get_variants

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]

#.........这里部分代码省略.........
                # make the ClinVarVariant the clique leader
                model.makeLeader(seqalt_id)

                if bandinbuild_id is not None:
                    f.addSubsequenceOfFeature(bandinbuild_id)

                # CHECK - this makes the assumption that there is
                # only one affected chromosome per variant what happens with
                # chromosomal rearrangement variants?
                # shouldn't both chromosomes be here?

                # add the hgvs as synonyms
                if hgvs_c != '-' and hgvs_c.strip() != '':
                    model.addSynonym(seqalt_id, hgvs_c)
                if hgvs_p != '-' and hgvs_p.strip() != '':
                    model.addSynonym(seqalt_id, hgvs_p)

                # add the dbsnp and dbvar ids as equivalent
                if dbsnp_num != '-' and int(dbsnp_num) != -1:
                    dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
                    model.addIndividualToGraph(dbsnp_id, None)
                    model.addSameIndividual(seqalt_id, dbsnp_id)
                if dbvar_num != '-':
                    dbvar_id = 'dbVar:'+dbvar_num
                    model.addIndividualToGraph(dbvar_id, None)
                    model.addSameIndividual(seqalt_id, dbvar_id)

                # TODO - not sure if this is right... add as xref?
                # the rcv is like the combo of the phenotype with the variant
                if rcv_nums != '-':
                    for rcv_num in re.split(r';', rcv_nums):
                        rcv_id = 'ClinVar:' + rcv_num
                        model.addIndividualToGraph(rcv_id, None)
                        model.addXref(seqalt_id, rcv_id)

                if gene_id is not None:
                    # add the gene
                    model.addClassToGraph(gene_id, gene_symbol)
                    # make a variant locus
                    vl_id = '_'+gene_num+'-'+variant_num
                    if self.nobnodes:
                        vl_id = ':'+vl_id
                    vl_label = allele_name
                    model.addIndividualToGraph(
                        vl_id, vl_label, geno.genoparts['variant_locus'])
                    geno.addSequenceAlterationToVariantLocus(seqalt_id, vl_id)
                    geno.addAlleleOfGene(vl_id, gene_id)
                else:
                    # some basic reporting
                    gmatch = re.search(r'\(\w+\)', allele_name)
                    if gmatch is not None and len(gmatch.groups()) > 0:
                        logger.info(
                            "Gene found in allele label, but no id provided: %s",
                            gmatch.group(1))
                    elif re.match(r'more than 10', gene_symbol):
                        logger.info(
                            "More than 10 genes found; "
                            "need to process XML to fetch (variant=%d)",
                            int(variant_num))
                    else:
                        logger.info(
                            "No gene listed for variant %d",
                            int(variant_num))

                # parse the list of "phenotypes" which are diseases.
                # add them as an association
开发者ID:DoctorBud,项目名称:dipper,代码行数:70,代码来源:ClinVar.py

示例6: _get_process_allelic_variants

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _get_process_allelic_variants(self, entry, g):
        model = Model(g)
        reference = Reference(g)
        geno = Genotype(g)
        if entry is not None:
            # to hold the entry-specific publication mentions
            # for the allelic variants
            publist = {}
            entry_num = entry['mimNumber']

            # process the ref list just to get the pmids
            ref_to_pmid = self._get_pubs(entry, g)

            if 'allelicVariantList' in entry:
                allelicVariantList = entry['allelicVariantList']
                for al in allelicVariantList:
                    al_num = al['allelicVariant']['number']
                    al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
                    al_label = None
                    al_description = None
                    if al['allelicVariant']['status'] == 'live':
                        publist[al_id] = set()
                        if 'mutations' in al['allelicVariant']:
                            al_label = al['allelicVariant']['mutations']
                        if 'text' in al['allelicVariant']:
                            al_description = al['allelicVariant']['text']
                            m = re.findall(r'\{(\d+)\:', al_description)
                            publist[al_id] = set(m)
                        geno.addAllele(
                            al_id, al_label, geno.genoparts['variant_locus'],
                            al_description)
                        geno.addAlleleOfGene(
                            al_id, 'OMIM:'+str(entry_num),
                            geno.object_properties[
                                'is_sequence_variant_instance_of'])
                        for r in publist[al_id]:
                            pmid = ref_to_pmid[int(r)]
                            g.addTriple(
                                pmid, model.object_properties['is_about'],
                                al_id)
                        # look up the pubmed id in the list of references
                        if 'dbSnps' in al['allelicVariant']:
                            dbsnp_ids = \
                                re.split(r',', al['allelicVariant']['dbSnps'])
                            for dnum in dbsnp_ids:
                                did = 'dbSNP:'+dnum.strip()
                                model.addIndividualToGraph(did, None)
                                model.addSameIndividual(al_id, did)
                        if 'clinvarAccessions' in al['allelicVariant']:
                            # clinvarAccessions triple semicolon delimited
                            # each >1 like RCV000020059;;;
                            rcv_ids = \
                                re.split(
                                    r';;;',
                                    al['allelicVariant']['clinvarAccessions'])
                            rcv_ids = [
                                (re.match(r'(RCV\d+);*', r)).group(1)
                                for r in rcv_ids]
                            for rnum in rcv_ids:
                                rid = 'ClinVar:'+rnum
                                model.addXref(al_id, rid)
                        reference.addPage(
                            al_id, "http://omim.org/entry/" +
                            str(entry_num)+"#" + str(al_num).zfill(4))
                    elif re.search(
                            r'moved', al['allelicVariant']['status']):
                        # for both 'moved' and 'removed'
                        moved_ids = None
                        if 'movedTo' in al['allelicVariant']:
                            moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
                            moved_ids = [moved_id]
                        model.addDeprecatedIndividual(al_id, moved_ids)
                    else:
                        logger.error('Uncaught alleleic variant status %s',
                                     al['allelicVariant']['status'])
                # end loop allelicVariantList

        return
开发者ID:kshefchek,项目名称:dipper,代码行数:80,代码来源:OMIM.py

示例7: _process_qtls_genetic_location

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]

#.........这里部分代码省略.........
                            int(float(x.strip())) for x in re.split(r'-', range_cm)]
                    else:
                        LOG.info(
                            "A cM range we can't handle for QTL %s: %s",
                            qtl_id, range_cm)
                elif position_cm != '':
                    match = re.match(r'([0-9]*\.[0-9]*)', position_cm)
                    if match is not None:
                        position_cm = match.group()
                        start = stop = int(float(position_cm))

                # FIXME remove converion to int for start/stop
                # when schema can handle floats add in the genetic location
                # based on the range
                feature.addFeatureStartLocation(
                    start, chrom_in_build_id, None,
                    [self.globaltt['FuzzyPosition']])
                feature.addFeatureEndLocation(
                    stop, chrom_in_build_id, None,
                    [self.globaltt['FuzzyPosition']])
                feature.addFeatureToGraph()

                # sometimes there's a peak marker, like a rsid.
                # we want to add that as a variant of the gene,
                # and xref it to the qtl.
                dbsnp_id = None
                if peak_mark != '' and peak_mark != '.' and \
                        re.match(r'rs', peak_mark.strip()):
                    dbsnp_id = 'dbSNP:'+peak_mark.strip()

                    model.addIndividualToGraph(
                        dbsnp_id, None,
                        self.globaltt['sequence_alteration'])
                    model.addXref(qtl_id, dbsnp_id)

                gene_id = gene_id.replace('uncharacterized ', '').strip()
                if gene_id is not None and gene_id != '' and gene_id != '.'\
                        and re.fullmatch(r'[^ ]*', gene_id) is not None:

                    # we assume if no src is provided and gene_id is an integer,
                    # then it is an NCBI gene ... (okay, lets crank that back a notch)
                    if gene_id_src == '' and gene_id.isdigit() and \
                            gene_id in self.gene_info:
                        # LOG.info(
                        #    'Warm & Fuzzy saying %s is a NCBI gene for %s',
                        #    gene_id, common_name)
                        gene_id_src = 'NCBIgene'
                    elif gene_id_src == '' and gene_id.isdigit():
                        LOG.warning(
                            'Cold & Prickely saying %s is a NCBI gene for %s',
                            gene_id, common_name)
                        gene_id_src = 'NCBIgene'
                    elif gene_id_src == '':
                        LOG.error(
                            ' "%s" is a NOT NCBI gene for %s', gene_id, common_name)
                        gene_id_src = None

                    if gene_id_src == 'NCBIgene':
                        gene_id = 'NCBIGene:' + gene_id
                        # we will expect that these will get labels elsewhere
                        geno.addGene(gene_id, None)
                        # FIXME what is the right relationship here?
                        geno.addAffectedLocus(qtl_id, gene_id)

                        if dbsnp_id is not None:
                            # add the rsid as a seq alt of the gene_id
开发者ID:TomConlin,项目名称:dipper,代码行数:70,代码来源:AnimalQTLdb.py

示例8: _process_genes

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_genes(self, taxid, limit=None):
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph

        model = Model(graph)
        geno = Genotype(graph)

        raw = '/'.join((self.rawdir, self.files[taxid]['file']))
        line_counter = 0
        LOG.info("Processing Ensembl genes for tax %s", taxid)
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t')
            for row in filereader:
                if len(row) < 4:
                    LOG.warning("Too few columns in: " + row)
                    raise ValueError("Data error for file %s", raw)
                (ensembl_gene_id, external_gene_name, description, gene_biotype,
                 entrezgene, ensembl_peptide_id, uniprotswissprot) = row[0:7]

                # in the case of human genes, we also get the hgnc id,
                # and is the last col
                if taxid == '9606':
                    hgnc_id = row[7]
                else:
                    hgnc_id = None

                if self.test_mode and entrezgene != '' and \
                        int(entrezgene) not in self.gene_ids:
                    continue

                line_counter += 1
                gene_id = 'ENSEMBL:' + ensembl_gene_id
                peptide_curie = 'ENSEMBL:{}'.format(ensembl_peptide_id)
                uniprot_curie = 'UniProtKB:{}'.format(uniprotswissprot)
                entrez_curie = 'NCBIGene:{}'.format(entrezgene)

                if description == '':
                    description = None
                gene_biotype = gene_biotype.strip()
                gene_type_id = self.resolve(gene_biotype, False)
                if gene_type_id == gene_biotype.strip():   # did not resolve
                    gene_type_id = self.globaltt['polypeptide']

                model.addClassToGraph(
                    gene_id, external_gene_name, gene_type_id, description)
                model.addIndividualToGraph(peptide_curie, None, gene_type_id)
                model.addIndividualToGraph(uniprot_curie, None, gene_type_id)

                if entrezgene != '':
                    if taxid == '9606':
                        # Use HGNC for eq in human data
                        model.addXref(gene_id, entrez_curie)
                    else:
                        model.addEquivalentClass(gene_id, entrez_curie)
                if hgnc_id is not None and hgnc_id != '':
                    model.addEquivalentClass(gene_id, hgnc_id)
                geno.addTaxon('NCBITaxon:'+taxid, gene_id)
                if ensembl_peptide_id != '':
                    geno.addGeneProduct(gene_id, peptide_curie)
                    if uniprotswissprot != '':
                        geno.addGeneProduct(gene_id, uniprot_curie)
                        model.addXref(peptide_curie, uniprot_curie)

                if not self.test_mode and limit is not None and line_counter > limit:
                    break

        return
开发者ID:TomConlin,项目名称:dipper,代码行数:71,代码来源:Ensembl.py

示例9: _process_ortholog_classes

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_ortholog_classes(self, limit=None):
        """
        This method add the KEGG orthology classes to the graph.

        If there's an embedded enzyme commission number,
        that is added as an xref.

        Triples created:
        <orthology_class_id> is a class
        <orthology_class_id> has label <orthology_symbols>
        <orthology_class_id> has description <orthology_description>
        :param limit:

        :return:
        """

        LOG.info("Processing ortholog classes")
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        model = Model(graph)
        raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in reader:
                (orthology_class_id, orthology_class_name) = row

                if self.test_mode and orthology_class_id \
                        not in self.test_ids['orthology_classes']:
                    continue

                # The orthology class is essentially a KEGG gene ID
                # that is species agnostic.
                # Add the ID and label as a gene family class

                other_labels = re.split(r'[;,]', orthology_class_name)
                # the first one is the label we'll use
                orthology_label = other_labels[0]

                orthology_class_id = 'KEGG-'+orthology_class_id.strip()

                orthology_type = self.globaltt['gene_family']
                model.addClassToGraph(
                    orthology_class_id, orthology_label, orthology_type)
                if len(other_labels) > 1:
                    # add the rest as synonyms
                    # todo skip the first
                    for s in other_labels:
                        model.addSynonym(orthology_class_id, s.strip())

                    # add the last one as the description
                    d = other_labels[len(other_labels)-1]
                    model.addDescription(orthology_class_id, d)

                    # add the enzyme commission number (EC:1.2.99.5)as an xref
                    # sometimes there's two, like [EC:1.3.5.1 1.3.5.4]
                    # can also have a dash, like EC:1.10.3.-
                    ec_matches = re.findall(r'((?:\d+|\.|-){5,7})', d)
                    if ec_matches is not None:
                        for ecm in ec_matches:
                            model.addXref(orthology_class_id, 'EC:' + ecm)

                if not self.test_mode and limit is not None and reader.line_num > limit:
                    break
        LOG.info("Done with ortholog classes")
开发者ID:TomConlin,项目名称:dipper,代码行数:68,代码来源:KEGG.py

示例10: _process_genes

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_genes(self, taxid, limit=None):
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        model = Model(g)
        geno = Genotype(g)

        raw = '/'.join((self.rawdir, self.files[taxid]['file']))
        line_counter = 0
        logger.info("Processing Ensembl genes for tax %s", taxid)
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t')
            for row in filereader:
                if len(row) < 4:
                    raise ValueError("Data error for file %s", raw)
                (ensembl_gene_id, external_gene_name,
                 description, gene_biotype, entrezgene,
                 peptide_id, uniprot_swissprot) = row[0:7]

                # in the case of human genes, we also get the hgnc id,
                # and is the last col
                if taxid == '9606':
                    hgnc_id = row[7]
                else:
                    hgnc_id = None

                if self.testMode and entrezgene != '' \
                        and int(entrezgene) not in self.gene_ids:
                    continue

                line_counter += 1
                gene_id = 'ENSEMBL:' + ensembl_gene_id
                peptide_curie = 'ENSEMBL:{}'.format(peptide_id)
                uniprot_curie = 'UniProtKB:{}'.format(uniprot_swissprot)
                entrez_curie = 'NCBIGene:{}'.format(entrezgene)

                if description == '':
                    description = None
                # gene_type_id = self._get_gene_type(gene_biotype)
                gene_type_id = None
                model.addClassToGraph(
                    gene_id, external_gene_name, gene_type_id, description)
                model.addIndividualToGraph(peptide_curie, None, self._get_gene_type("polypeptide"))
                model.addIndividualToGraph(uniprot_curie, None, self._get_gene_type("polypeptide"))

                if entrezgene != '':
                    model.addEquivalentClass(gene_id, entrez_curie)
                if hgnc_id is not None and hgnc_id != '':
                    model.addEquivalentClass(gene_id, hgnc_id)
                geno.addTaxon('NCBITaxon:'+taxid, gene_id)
                if peptide_id != '':
                    geno.addGeneProduct(gene_id, peptide_curie)
                    if uniprot_swissprot != '':
                        geno.addGeneProduct(gene_id, uniprot_curie)
                        model.addXref(peptide_curie, uniprot_curie)

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

        return
开发者ID:kshefchek,项目名称:dipper,代码行数:65,代码来源:Ensembl.py

示例11: _process_trait_mappings

# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addXref [as 别名]
    def _process_trait_mappings(self, raw, limit=None):
        """
        This method mapps traits from/to ...

        Triples created:

        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        model = Model(g)

        # with open(raw, 'r') as csvfile:
        #     filereader = csv.reader(csvfile, delimiter=',')
        #     row_count = sum(1 for row in filereader)
        #     row_count = row_count - 1

        with open(raw, 'r') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            next(filereader, None)  # skip header line
            for row in filereader:
                line_counter += 1
                # need to skip the last line
                if len(row) < 8:
                    logger.info(
                        "skipping line %d: %s", line_counter, '\t'.join(row))
                    continue
                (vto_id, pto_id, cmo_id, ato_column, species, trait_class,
                 trait_type, qtl_count) = row

                ato_id = re.sub(r'ATO #', 'AQTLTrait:',
                                re.sub(r'\].*', '',
                                       re.sub(r'\[', '', ato_column)))
                ato_id = ato_id.strip()

                ato_label = re.sub(r'.*\]\s*', '', ato_column)

                # if species == 'Cattle':
                #     ato_id = re.sub(r'ATO:', 'AQTLTraitCattle:', ato_id)
                # elif species == 'Chicken':
                #     ato_id = re.sub(r'ATO:', 'AQTLTraitChicken:', ato_id)
                # elif species == 'Sheep':
                #     ato_id = re.sub(r'ATO:', 'AQTLTraitSheep:', ato_id)
                # elif species == 'Horse':
                #     ato_id = re.sub(r'ATO:', 'AQTLTraitHorse:', ato_id)
                # elif species == 'Pig':
                #     ato_id = re.sub(r'ATO:', 'AQTLTraitPig:', ato_id)
                # elif species == 'Rainbow trout':
                #     ato_id = re.sub(
                #       r'ATO:', 'AQTLTraitRainbowTrout:', ato_id)
                # else:
                #     logger.warning(
                #       'Unknown species %s foufnd in trait mapping file.',
                #       species)
                #     continue
                # print(ato_label)

                model.addClassToGraph(ato_id, ato_label.strip())

                if re.match(r'VT:.*', vto_id):
                    model.addClassToGraph(vto_id, None)
                    model.addEquivalentClass(ato_id, vto_id)
                if re.match(r'LPT:.*', pto_id):
                    model.addClassToGraph(pto_id, None)
                    model.addXref(ato_id, pto_id)
                if re.match(r'CMO:.*', cmo_id):
                    model.addClassToGraph(cmo_id, None)
                    model.addXref(ato_id, cmo_id)

        logger.info("Done with trait mappings")
        return
开发者ID:DoctorBud,项目名称:dipper,代码行数:77,代码来源:AnimalQTLdb.py


注:本文中的dipper.models.Model.Model.addXref方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。