当前位置: 首页>>代码示例>>Python>>正文


Python GraphUtils.addEquivalentClass方法代码示例

本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addEquivalentClass方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addEquivalentClass方法的具体用法?Python GraphUtils.addEquivalentClass怎么用?Python GraphUtils.addEquivalentClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.utils.GraphUtils.GraphUtils的用法示例。


在下文中一共展示了GraphUtils.addEquivalentClass方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _map_eom_terms

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _map_eom_terms(self, raw, limit=None):
        """
        This table contains the HP ID mappings from the local tsv file.
        Triples:
            <eom id> owl:equivalentClass <hp id>
        :param raw:
        :param limit:
        :return:
        """

        gu = GraphUtils(curie_map.get())

        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            for line in f1:
                line_counter += 1

                (morphology_term_id, morphology_term_label, hp_id, hp_label, notes) = line.split('\t')

                # Sub out the underscores for colons.
                hp_id = re.sub('_', ':', hp_id)
                if re.match(".*HP:.*", hp_id):
                    # add the HP term as a class
                    gu.addClassToGraph(self.graph, hp_id, None)
                    # Add the HP ID as an equivalent class
                    gu.addEquivalentClass(self.graph, morphology_term_id, hp_id)
                else:
                    logger.warning('No matching HP term for %s', morphology_term_label)

                if limit is not None and line_counter > limit:
                    break

        return
开发者ID:d3borah,项目名称:dipper,代码行数:36,代码来源:EOM.py

示例2: _get_process_allelic_variants

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _get_process_allelic_variants(self, entry, g):
        gu = GraphUtils(curie_map.get())
        geno = Genotype(g)
        du = DipperUtil()
        if entry is not None:
            publist = {}  # to hold the entry-specific publication mentions for the allelic variants
            entry_num = entry['mimNumber']

            # process the ref list just to get the pmids
            ref_to_pmid = self._get_pubs(entry, g)

            if 'allelicVariantList' in entry:
                allelicVariantList = entry['allelicVariantList']
                for al in allelicVariantList:
                    al_num = al['allelicVariant']['number']
                    al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
                    al_label = None
                    al_description = None
                    if al['allelicVariant']['status'] == 'live':
                        publist[al_id] = set()
                        if 'mutations' in al['allelicVariant']:
                            al_label = al['allelicVariant']['mutations']
                        if 'text' in al['allelicVariant']:
                            al_description = al['allelicVariant']['text']
                            m = re.findall('\{(\d+)\:', al_description)
                            publist[al_id] = set(m)
                        geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
                        geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
                                             geno.object_properties['is_sequence_variant_instance_of'])
                        for r in publist[al_id]:
                            pmid = ref_to_pmid[int(r)]
                            gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
                        # look up the pubmed id in the list of references
                        if 'dbSnps' in al['allelicVariant']:
                            dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
                            for dnum in dbsnp_ids:
                                did = 'dbSNP:'+dnum.strip()
                                gu.addIndividualToGraph(g, did, None)
                                gu.addEquivalentClass(g, al_id, did)
                        if 'clinvarAccessions' in al['allelicVariant']:
                            # clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
                            rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
                            rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
                            for rnum in rcv_ids:
                                rid = 'ClinVar:'+rnum
                                gu.addXref(g, al_id, rid)
                        gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
                    elif re.search('moved', al['allelicVariant']['status']):
                        # for both 'moved' and 'removed'
                        moved_ids = None
                        if 'movedTo' in al['allelicVariant']:
                            moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
                            moved_ids = [moved_id]
                        gu.addDeprecatedIndividual(g, al_id, moved_ids)
                    else:
                        logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
                # end loop allelicVariantList

        return
开发者ID:d3borah,项目名称:dipper,代码行数:61,代码来源:OMIM.py

示例3: _process_genes

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_genes(self, taxid, limit=None):
        gu = GraphUtils(curie_map.get())

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        geno = Genotype(g)

        raw = '/'.join((self.rawdir, self.files[taxid]['file']))
        line_counter = 0
        logger.info("Processing Ensembl genes for tax %s", taxid)
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t')
            for row in filereader:
                if len(row) < 4:
                    logger.error("Data error for file %s", raw)
                    return
                (ensembl_gene_id, external_gene_name, description,
                 gene_biotype, entrezgene) = row[0:5]

                # in the case of human genes, we also get the hgnc id,
                # and is the last col
                if taxid == '9606':
                    hgnc_id = row[5]
                else:
                    hgnc_id = None

                if self.testMode and entrezgene != '' \
                        and int(entrezgene) not in self.gene_ids:
                    continue

                line_counter += 1
                gene_id = 'ENSEMBL:'+ensembl_gene_id
                if description == '':
                    description = None
                gene_type_id = self._get_gene_type(gene_biotype)
                gene_type_id = None
                gu.addClassToGraph(
                    g, gene_id, external_gene_name, gene_type_id, description)

                if entrezgene != '':
                    gu.addEquivalentClass(g, gene_id, 'NCBIGene:'+entrezgene)
                if hgnc_id is not None and hgnc_id != '':
                    gu.addEquivalentClass(g, gene_id, hgnc_id)
                geno.addTaxon('NCBITaxon:'+taxid, gene_id)

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

        gu.loadProperties(g, Feature.object_properties, gu.OBJPROP)
        gu.loadProperties(g, Feature.data_properties, gu.DATAPROP)
        gu.loadProperties(g, Genotype.object_properties, gu.OBJPROP)
        gu.loadAllProperties(g)

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:60,代码来源:Ensembl.py

示例4: _process_genes_kegg2ncbi

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_genes_kegg2ncbi(self, limit=None):
        """
        This method maps the KEGG human gene IDs
            to the corresponding NCBI Gene IDs.

        Triples created:
        <kegg_gene_id> is a class
        <ncbi_gene_id> is a class
        <kegg_gene_id> equivalentClass <ncbi_gene_id>
        :param limit:
        :return:

        """

        logger.info("Processing KEGG gene IDs to NCBI gene IDs")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0

        gu = GraphUtils(curie_map.get())
        raw = '/'.join((self.rawdir, self.files['ncbi']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (kegg_gene_id, ncbi_gene_id, link_type) = row

                if self.testMode and \
                        kegg_gene_id not in self.test_ids['genes']:
                    continue

                # Adjust the NCBI gene ID prefix.
                ncbi_gene_id = re.sub(r'ncbi-geneid', 'NCBIGene', ncbi_gene_id)
                kegg_gene_id = 'KEGG-'+kegg_gene_id

                # Adding the KEGG gene ID to the graph here is redundant,
                # unless there happens to be additional gene IDs in this table
                # not present in the genes table.
                gu.addClassToGraph(g, kegg_gene_id, None)
                gu.addClassToGraph(g, ncbi_gene_id, None)
                gu.addEquivalentClass(g, kegg_gene_id, ncbi_gene_id)

                if (not self.testMode) and (
                        limit is not None and line_counter > limit):
                    break

        logger.info("Done with KEGG gene IDs to NCBI gene IDs")
        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:52,代码来源:KEGG.py

示例5: _process_pathway_pathway

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_pathway_pathway(self, limit):
        """
        There are "map" and "ko" identifiers for pathways.
        This makes equivalence mapping between them, where they exist.
        :param limit:
        :return:

        """
        logger.info("Processing KEGG pathways to other ids")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0

        gu = GraphUtils(curie_map.get())
        raw = '/'.join((self.rawdir, self.files['pathway_pathway']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (pathway_id_1, pathway_id_2) = row

                if self.testMode and \
                        pathway_id_1 not in self.test_ids['pathway']:
                    continue

                pathway_id_1 = 'KEGG-'+pathway_id_1
                # will look like KEGG-path:map04130 or KEGG-path:ko04130
                pathway_id_2 = 'KEGG-'+pathway_id_2

                if pathway_id_1 != pathway_id_2:
                    gu.addEquivalentClass(g, pathway_id_1, pathway_id_2)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:41,代码来源:KEGG.py

示例6: _get_mappedids

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _get_mappedids(self, entry, g):
        """
        Extract the Orphanet and UMLS ids as equivalences from the entry
        :param entry:
        :return:
        """
        # umlsIDs
        gu = GraphUtils(curie_map.get())
        omimid = 'OMIM:'+str(entry['mimNumber'])
        orpha_mappings = []
        if 'externalLinks' in entry:
            links = entry['externalLinks']
            if 'orphanetDiseases' in links:
                # triple semi-colon delimited list of double semi-colon delimited orphanet ID/disease pairs
                # 2970;;566;;Prune belly syndrome
                items = links['orphanetDiseases'].split(';;;')
                for i in items:
                    (orpha_num, internal_num, orpha_label) = i.split(';;')
                    orpha_id = 'Orphanet:'+orpha_num.strip()
                    orpha_mappings.append(orpha_id)
                    gu.addClassToGraph(g, orpha_id, orpha_label.strip())
                    gu.addXref(g, omimid, orpha_id)

            if 'umlsIDs' in links:
                umls_mappings = links['umlsIDs'].split(',')
                for i in umls_mappings:
                    umls_id = 'UMLS:'+i
                    gu.addClassToGraph(g, umls_id, None)
                    gu.addXref(g, omimid, umls_id)

            if self._get_omimtype(entry) == Genotype.genoparts['gene'] and 'geneIDs' in links:
                entrez_mappings = links['geneIDs']
                for i in entrez_mappings.split(','):
                    gu.addEquivalentClass(g, omimid, 'NCBIGene:'+str(i))

        return
开发者ID:d3borah,项目名称:dipper,代码行数:38,代码来源:OMIM.py

示例7: _process_diseasegene

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]

#.........这里部分代码省略.........
                # get the element name and id
                # id = elem.get('id') # some internal identifier
                disorder_num = elem.find("OrphaNumber").text

                disorder_id = "Orphanet:" + str(disorder_num)

                if self.testMode and disorder_id not in config.get_config()["test_ids"]["disease"]:
                    continue

                disorder_label = elem.find("Name").text

                # make a hash of internal gene id to type for later lookup
                gene_iid_to_type = {}
                gene_list = elem.find("GeneList")
                for gene in gene_list.findall("Gene"):
                    gene_iid = gene.get("id")
                    gene_type = gene.find("GeneType").get("id")
                    gene_iid_to_type[gene_iid] = gene_type

                gu.addClassToGraph(g, disorder_id, disorder_label)  # assuming that these are in the ontology

                assoc_list = elem.find("DisorderGeneAssociationList")
                for a in assoc_list.findall("DisorderGeneAssociation"):
                    gene_iid = a.find(".//Gene").get("id")
                    gene_name = a.find(".//Gene/Name").text
                    gene_symbol = a.find(".//Gene/Symbol").text
                    gene_num = a.find("./Gene/OrphaNumber").text
                    gene_id = "Orphanet:" + str(gene_num)
                    gene_type_id = self._map_gene_type_id(gene_iid_to_type[gene_iid])
                    gu.addClassToGraph(g, gene_id, gene_symbol, gene_type_id, gene_name)
                    syn_list = a.find("./Gene/SynonymList")
                    if int(syn_list.get("count")) > 0:
                        for s in syn_list.findall("./Synonym"):
                            gu.addSynonym(g, gene_id, s.text)

                    dgtype = a.find("DisorderGeneAssociationType").get("id")
                    rel_id = self._map_rel_id(dgtype)
                    dg_label = a.find("./DisorderGeneAssociationType/Name").text
                    if rel_id is None:
                        logger.warn(
                            "Cannot map association type (%s) to RO for association (%s | %s).  Skipping.",
                            dg_label,
                            disorder_label,
                            gene_symbol,
                        )
                        continue

                    alt_locus_id = "_" + gene_num + "-" + disorder_num + "VL"
                    alt_label = " ".join(
                        ("some variant of", gene_symbol.strip(), "that is a", dg_label.lower(), disorder_label)
                    )
                    if self.nobnodes:
                        alt_locus_id = ":" + alt_locus_id
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts["variant_locus"])
                    geno.addAlleleOfGene(alt_locus_id, gene_id)

                    # consider typing the gain/loss-of-function variants like:
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002054
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002053

                    # use "assessed" status to issue an evidence code
                    # FIXME I think that these codes are sub-optimal
                    status_code = a.find("DisorderGeneAssociationStatus").get("id")
                    eco_id = "ECO:0000323"  # imported automatically asserted information used in automatic assertion
                    if status_code == "17991":  # Assessed  # TODO are these internal ids stable between releases?
                        eco_id = "ECO:0000322"  # imported manually asserted information used in automatic assertion
                    # Non-traceable author statement ECO_0000034
                    # imported information in automatic assertion ECO_0000313

                    assoc = G2PAssoc(self.name, alt_locus_id, disorder_id, rel_id)
                    assoc.add_evidence(eco_id)
                    assoc.add_association_to_graph(g)

                    rlist = a.find("./Gene/ExternalReferenceList")
                    eqid = None

                    for r in rlist.findall("ExternalReference"):
                        if r.find("Source").text == "Ensembl":
                            eqid = "ENSEMBL:" + r.find("Reference").text
                        elif r.find("Source").text == "HGNC":
                            eqid = "HGNC:" + r.find("Reference").text
                        elif r.find("Source").text == "OMIM":
                            eqid = "OMIM:" + r.find("Reference").text
                        else:
                            pass  # skip the others for now
                        if eqid is not None:
                            gu.addClassToGraph(g, eqid, None)
                            gu.addEquivalentClass(g, gene_id, eqid)
                            pass
                elem.clear()  # discard the element

            if self.testMode and limit is not None and line_counter > limit:
                return

        gu.loadProperties(g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
        gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
        gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)
        gu.loadAllProperties(g)

        return
开发者ID:d3borah,项目名称:dipper,代码行数:104,代码来源:Orphanet.py

示例8: _get_gene_info

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _get_gene_info(self, limit):
        """
        Currently loops through the gene_info file and creates the genes as classes, typed with SO.  It will add their
        label, any alternate labels as synonyms, alternate ids as equivlaent classes.  HPRDs get added as
        protein products.  The chromosome and chr band get added as blank node regions, and the gene is faldo:located
        on the chr band.
        :param limit:
        :return:
        """
        gu = GraphUtils(curie_map.get())

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        geno = Genotype(g)

        # not unzipping the file
        logger.info("Processing Gene records")
        line_counter = 0
        myfile = '/'.join((self.rawdir, self.files['gene_info']['file']))
        logger.info("FILE: %s", myfile)

        # Add taxa and genome classes for those in our filter
        for tax_num in self.tax_ids:
            tax_id = ':'.join(('NCBITaxon', str(tax_num)))
            geno.addGenome(tax_id, str(tax_num))   # tax label can get added elsewhere
            gu.addClassToGraph(g, tax_id, None)   # label added elsewhere
        with gzip.open(myfile, 'rb') as f:
            for line in f:
                # skip comments
                line = line.decode().strip()
                if re.match('^#', line):
                    continue
                (tax_num, gene_num, symbol, locustag,
                 synonyms, xrefs, chr, map_loc, desc,
                 gtype, authority_symbol, name,
                 nomenclature_status, other_designations, modification_date) = line.split('\t')

                ##### set filter=None in init if you don't want to have a filter
                #if self.filter is not None:
                #    if ((self.filter == 'taxids' and (int(tax_num) not in self.tax_ids))
                #            or (self.filter == 'geneids' and (int(gene_num) not in self.gene_ids))):
                #        continue
                ##### end filter

                if self.testMode and int(gene_num) not in self.gene_ids:
                    continue

                if int(tax_num) not in self.tax_ids:
                    continue

                line_counter += 1

                gene_id = ':'.join(('NCBIGene', gene_num))
                tax_id = ':'.join(('NCBITaxon', tax_num))
                gene_type_id = self._map_type_of_gene(gtype)

                if symbol == 'NEWENTRY':
                    label = None
                else:
                    label = symbol

                # TODO might have to figure out if things aren't genes, and make them individuals
                gu.addClassToGraph(g, gene_id, label, gene_type_id, desc)

                # we have to do special things here for genes, because they're classes not individuals
                # f = Feature(gene_id,label,gene_type_id,desc)

                if name != '-':
                    gu.addSynonym(g, gene_id, name)
                if synonyms.strip() != '-':
                    for s in synonyms.split('|'):
                        gu.addSynonym(g, gene_id, s.strip(), Assoc.annotation_properties['hasRelatedSynonym'])
                if other_designations.strip() != '-':
                    for s in other_designations.split('|'):
                        gu.addSynonym(g, gene_id, s.strip(), Assoc.annotation_properties['hasRelatedSynonym'])

                # deal with the xrefs
                # MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696
                if xrefs.strip() != '-':
                    for r in xrefs.strip().split('|'):
                        fixedr = self._cleanup_id(r)
                        if fixedr is not None and fixedr.strip() != '':
                            if re.match('HPRD', fixedr):
                                # proteins are not == genes.
                                gu.addTriple(g, gene_id, self.properties['has_gene_product'], fixedr)
                            else:
                                # skip some of these for now
                                if fixedr.split(':')[0] not in ['Vega', 'IMGT/GENE-DB']:
                                    gu.addEquivalentClass(g, gene_id, fixedr)

                # edge cases of id | symbol | chr | map_loc:
                # 263     AMD1P2    X|Y  with   Xq28 and Yq12
                # 438     ASMT      X|Y  with   Xp22.3 or Yp11.3    # in PAR
                # 419     ART3      4    with   4q21.1|4p15.1-p14   # no idea why there's two bands listed - possibly 2 assemblies
                # 28227   PPP2R3B   X|Y  Xp22.33; Yp11.3            # in PAR
                # 619538  OMS     10|19|3 10q26.3;19q13.42-q13.43;3p25.3   #this is of "unknown" type == susceptibility
                # 101928066       LOC101928066    1|Un    -         # unlocated scaffold
#.........这里部分代码省略.........
开发者ID:d3borah,项目名称:dipper,代码行数:103,代码来源:NCBIGene.py

示例9: _get_identifiers

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _get_identifiers(self, limit):
        """
        This will process the id mapping file provided by Biogrid.
        The file has a very large header, which we scan past,
        then pull the identifiers, and make equivalence axioms

        :param limit:
        :return:

        """

        logger.info("getting identifier mapping")
        line_counter = 0
        f = '/'.join((self.rawdir, self.files['identifiers']['file']))
        myzip = ZipFile(f, 'r')
        # assume that the first entry is the item
        fname = myzip.namelist()[0]
        foundheader = False

        gu = GraphUtils(curie_map.get())

        # TODO align this species filter with the one above
        # speciesfilters = 'Homo sapiens,Mus musculus,Drosophila melanogaster,
        # Danio rerio, Caenorhabditis elegans,Xenopus laevis'.split(',')

        speciesfilters = 'Homo sapiens,Mus musculus'.split(',')
        with myzip.open(fname, 'r') as csvfile:
            for line in csvfile:
                # skip header lines
                if not foundheader:
                    if re.match(r'BIOGRID_ID', line.decode()):
                        foundheader = True
                    continue

                line = line.decode().strip()
                # BIOGRID_ID
                # IDENTIFIER_VALUE
                # IDENTIFIER_TYPE
                # ORGANISM_OFFICIAL_NAME
                # 1	814566	ENTREZ_GENE	Arabidopsis thaliana
                (biogrid_num, id_num, id_type,
                 organism_label) = line.split('\t')

                if self.testMode:
                    g = self.testgraph
                    # skip any genes that don't match our test set
                    if int(biogrid_num) not in self.biogrid_ids:
                        continue
                else:
                    g = self.graph

                # for each one of these,
                # create the node and add equivalent classes
                biogrid_id = 'BIOGRID:'+biogrid_num
                prefix = self._map_idtype_to_prefix(id_type)

                # TODO make these filters available as commandline options
                # geneidtypefilters='NCBIGene,OMIM,MGI,FlyBase,ZFIN,MGI,HGNC,
                #                   WormBase,XenBase,ENSEMBL,miRBase'.split(',')
                geneidtypefilters = 'NCBIGene,MGI,ENSEMBL,ZFIN,HGNC'.split(',')
                # proteinidtypefilters='HPRD,Swiss-Prot,NCBIProtein'
                if (speciesfilters is not None) \
                        and (organism_label.strip() in speciesfilters):
                    line_counter += 1
                    if (geneidtypefilters is not None) \
                            and (prefix in geneidtypefilters):
                        mapped_id = ':'.join((prefix, id_num))
                        gu.addEquivalentClass(g, biogrid_id, mapped_id)
                    # this symbol will only get attached to the biogrid class
                    elif id_type == 'OFFICIAL_SYMBOL':
                        gu.addClassToGraph(g, biogrid_id, id_num)
                    # elif (id_type == 'SYNONYM'):
                    #   FIXME - i am not sure these are synonyms, altids?
                    #   gu.addSynonym(g,biogrid_id,id_num)

                if not self.testMode and limit is not None \
                        and line_counter > limit:
                    break

        myzip.close()

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:84,代码来源:BioGrid.py

示例10: _process_omim2disease

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_omim2disease(self, limit=None):
        """
        This method maps the KEGG disease IDs to the corresponding OMIM disease IDs.
        Currently this only maps KEGG diseases and OMIM diseases that have a 1:1 mapping.

        Triples created:
        <kegg_disease_id> is a class
        <omim_disease_id> is a class
        <kegg_disease_id> hasXref <omim_disease_id>
        :param limit:
        :return:
        """

        logger.info("Processing 1:1 KEGG disease to OMIM disease mappings")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        gu = GraphUtils(curie_map.get())
        raw = '/'.join((self.rawdir, self.files['omim2disease']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                (omim_disease_id, kegg_disease_id, link_type) = row

                kegg_disease_id = 'KEGG-'+kegg_disease_id.strip()
                omim_disease_id = re.sub('omim', 'OMIM', omim_disease_id)

                # Create hash for the links from OMIM ID -> KEGG ID
                if omim_disease_id not in self.omim_disease_hash:
                    self.omim_disease_hash[omim_disease_id] = [kegg_disease_id]
                else:
                    self.omim_disease_hash[omim_disease_id].append(kegg_disease_id)

                # Create hash for the links from KEGG ID -> OMIM ID
                if kegg_disease_id not in self.kegg_disease_hash:
                    self.kegg_disease_hash[kegg_disease_id] = [omim_disease_id]
                else:
                    self.kegg_disease_hash[kegg_disease_id].append(omim_disease_id)

        # Now process the disease hashes and only process 1:1 omim disease:KEGG disease entries.
        for omim_disease_id in self.omim_disease_hash:
            if self.testMode and omim_disease_id not in self.test_ids['disease']:
                continue

            if (not self.testMode) and (limit is not None and line_counter > limit):
                break
            line_counter += 1

            if len(self.omim_disease_hash[omim_disease_id]) == 1:
                kegg_disease_id = ''.join(self.omim_disease_hash.get(omim_disease_id))
                if len(self.kegg_disease_hash[kegg_disease_id]) == 1:
                    # add ids, and deal with the labels separately
                    gu.addClassToGraph(g, kegg_disease_id, None)
                    gu.addClassToGraph(g, omim_disease_id, None)
                    gu.addEquivalentClass(g, kegg_disease_id, omim_disease_id)  # safe?
                    # gu.addXref(g, kegg_disease_id, omim_disease_id)

        logger.info("Done with KEGG disease to OMIM disease mappings.")
        return
开发者ID:d3borah,项目名称:dipper,代码行数:63,代码来源:KEGG.py

示例11: _process_omim2gene

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_omim2gene(self, limit=None):
        """
        This method maps the OMIM IDs and KEGG gene ID. Currently split based on the link_type field.
        Equivalent link types are mapped as gene XRefs.
        Reverse link types are mapped as disease to gene associations.
        Original link types are currently skipped.

        Triples created:
        <kegg_gene_id> is a Gene
        <omim_gene_id> is a Gene
        <kegg_gene_id>> hasXref <omim_gene_id>

        <assoc_id> has subject <omim_disease_id>
        <assoc_id> has object <kegg_gene_id>
        :param limit:
        :return:
        """

        logger.info("Processing OMIM to KEGG gene")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())
        raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (kegg_gene_id, omim_id, link_type) = row

                if self.testMode and kegg_gene_id not in self.test_ids['genes']:
                    continue

                kegg_gene_id = 'KEGG-'+kegg_gene_id.strip()
                omim_id = re.sub('omim', 'OMIM', omim_id)
                if link_type == 'equivalent':
                    # these are genes!  so add them as a class then make equivalence
                    gu.addClassToGraph(g, omim_id, None)
                    geno.addGene(kegg_gene_id, None)
                    gu.addEquivalentClass(g, kegg_gene_id, omim_id)
                elif link_type == 'reverse':
                    # make an association between an OMIM ID and the KEGG gene ID
                    # we do this with omim ids because they are more atomic than KEGG ids

                    alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
                    alt_label = self.label_hash[alt_locus_id]
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts['variant_locus'])
                    geno.addAlleleOfGene(alt_locus_id, kegg_gene_id)

                    # Add the disease to gene relationship.
                    rel = gu.object_properties['is_marker_for']
                    assoc = G2PAssoc(self.name, alt_locus_id, omim_id, rel)
                    assoc.add_association_to_graph(g)

                elif link_type == 'original':
                    # these are sometimes a gene, and sometimes a disease
                    logger.info('Unable to handle original link for %s-%s', kegg_gene_id, omim_id)
                else:
                    # don't know what these are
                    logger.warn('Unhandled link type for %s-%s: %s', kegg_gene_id, omim_id, link_type)

                if (not self.testMode) and (limit is not None and line_counter > limit):
                    break

        logger.info("Done with OMIM to KEGG gene")
        gu.loadProperties(g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
        gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
        gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)

        return
开发者ID:d3borah,项目名称:dipper,代码行数:75,代码来源:KEGG.py

示例12: _process_genes

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_genes(self, limit=None):
        gu = GraphUtils(curie_map.get())

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        geno = Genotype(g)

        raw = '/'.join((self.rawdir, self.files['genes']['file']))
        line_counter = 0
        logger.info("Processing HGNC genes")

        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                (hgnc_id, symbol, name, locus_group, locus_type, status,
                 location, location_sortable, alias_symbol, alias_name,
                 prev_symbol, prev_name, gene_family, gene_family_id,
                 date_approved_reserved, date_symbol_changed,
                 date_name_changed, date_modified, entrez_id, ensembl_gene_id,
                 vega_id, ucsc_id, ena, refseq_accession, ccds_id, uniprot_ids,
                 pubmed_id, mgd_id, rgd_id, lsdb, cosmic, omim_id, mirbase,
                 homeodb, snornabase, bioparadigms_slc, orphanet,
                 pseudogene_org, horde_id, merops, imgt, iuphar,
                 kznf_gene_catalog, mamit_trnadb, cd, lncrnadb, enzyme_id,
                 intermediate_filament_db) = row

                line_counter += 1

                # skip header
                if line_counter <= 1:
                    continue

                if self.testMode and entrez_id != '' \
                        and int(entrez_id) not in self.gene_ids:
                    continue

                if name == '':
                    name = None
                gene_type_id = self._get_gene_type(locus_type)
                gu.addClassToGraph(g, hgnc_id, symbol, gene_type_id, name)
                if locus_type == 'withdrawn':
                    gu.addDeprecatedClass(g, hgnc_id)
                if entrez_id != '':
                    gu.addEquivalentClass(
                        g, hgnc_id, 'NCBIGene:' + entrez_id)
                if ensembl_gene_id != '':
                    gu.addEquivalentClass(
                        g, hgnc_id, 'ENSEMBL:' + ensembl_gene_id)
                geno.addTaxon('NCBITaxon:9606', hgnc_id)

                # add pubs as "is about"
                if pubmed_id != '':
                    for p in re.split(r'\|', pubmed_id.strip()):
                        if str(p) != '':
                            gu.addTriple(
                                g, 'PMID:' + str(p.strip()),
                                gu.object_properties['is_about'], hgnc_id)

                # add chr location
                # sometimes two are listed, like: 10p11.2 or 17q25
                # -- there are only 2 of these FRA10A and MPFD
                # sometimes listed like "1 not on reference assembly"
                # sometimes listed like 10q24.1-q24.3
                # sometimes like 11q11 alternate reference locus
                band = chrom = None
                chr_pattern = r'(\d+|X|Y|Z|W|MT)[pq$]'
                chr_match = re.match(chr_pattern, location)
                if chr_match is not None and len(chr_match.groups()) > 0:
                    chrom = chr_match.group(1)
                    chrom_id = makeChromID(chrom, 'NCBITaxon:9606', 'CHR')
                    band_pattern = r'([pq][A-H\d]?\d?(?:\.\d+)?)'
                    band_match = re.search(band_pattern, location)
                    f = Feature(hgnc_id, None, None)
                    if band_match is not None and len(band_match.groups()) > 0:
                        band = band_match.group(1)
                        band = chrom + band
                        # add the chr band as the parent to this gene
                        # as a feature but assume that the band is created
                        # as a class with properties elsewhere in Monochrom
                        # TEC Monoch? Monarchdom??
                        band_id = makeChromID(band, 'NCBITaxon:9606', 'CHR')
                        gu.addClassToGraph(g, band_id, None)
                        f.addSubsequenceOfFeature(g, band_id)
                    else:
                        gu.addClassToGraph(g, chrom_id, None)
                        f.addSubsequenceOfFeature(g, chrom_id)

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

            # end loop through file

        gu.loadProperties(g, Feature.object_properties, gu.OBJPROP)
        gu.loadProperties(g, Feature.data_properties, gu.DATAPROP)
        gu.loadProperties(g, Genotype.object_properties, gu.OBJPROP)
        gu.loadAllProperties(g)
#.........这里部分代码省略.........
开发者ID:JervenBolleman,项目名称:dipper,代码行数:103,代码来源:HGNC.py

示例13: _process_trait_mappings

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]
    def _process_trait_mappings(self, raw, limit=None):
        """
        This method

        Triples created:

        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0

        gu = GraphUtils(curie_map.get())

        # with open(raw, 'r') as csvfile:
        #     filereader = csv.reader(csvfile, delimiter=',')
        #     row_count = sum(1 for row in filereader)
        #     row_count = row_count - 1

        with open(raw, 'r') as csvfile:
            filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
            next(filereader, None)  # skip header line
            for row in filereader:
                line_counter += 1
                # need to skip the last line
                if len(row) < 8:
                    logger.info("skipping line %d: %s", line_counter, '\t'.join(row))
                    continue
                (vto_id, pto_id, cmo_id, ato_column, species, trait_class, trait_type, qtl_count) = row

                ato_id = re.sub('ATO #', 'AQTLTrait:', re.sub('\].*', '', re.sub('\[', '', ato_column)))

                ato_label = re.sub('.*\]\s*', '', ato_column)
                # if species == 'Cattle':
                #     ato_id = re.sub('ATO:', 'AQTLTraitCattle:', ato_id)
                # elif species == 'Chicken':
                #     ato_id = re.sub('ATO:', 'AQTLTraitChicken:', ato_id)
                # elif species == 'Sheep':
                #     ato_id = re.sub('ATO:', 'AQTLTraitSheep:', ato_id)
                # elif species == 'Horse':
                #     ato_id = re.sub('ATO:', 'AQTLTraitHorse:', ato_id)
                # elif species == 'Pig':
                #     ato_id = re.sub('ATO:', 'AQTLTraitPig:', ato_id)
                # elif species == 'Rainbow trout':
                #     ato_id = re.sub('ATO:', 'AQTLTraitRainbowTrout:', ato_id)
                # else:
                #     logger.warn(' Unknown species %s found in trait mapping file.', species)
                #     continue
                #print(ato_label)

                gu.addClassToGraph(g, ato_id, ato_label.strip())

                if re.match('VT:.*', vto_id):
                    gu.addClassToGraph(g, vto_id, None)
                    gu.addEquivalentClass(g, ato_id, vto_id)
                if re.match('PT:.*', pto_id):
                    gu.addClassToGraph(g, pto_id, None)
                    gu.addEquivalentClass(g, ato_id, pto_id)
                if re.match('CMO:.*', cmo_id):
                    gu.addClassToGraph(g, cmo_id, None)
                    gu.addXref(g, ato_id, cmo_id)

        logger.info("Done with trait mappings")
        return
开发者ID:d3borah,项目名称:dipper,代码行数:69,代码来源:AnimalQTLdb.py

示例14: _process_diseasegene

# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addEquivalentClass [as 别名]

#.........这里部分代码省略.........
                gene_iid_to_type = {}
                gene_list = elem.find('GeneList')
                for gene in gene_list.findall('Gene'):
                    gene_iid = gene.get('id')
                    gene_type = gene.find('GeneType').get('id')
                    gene_iid_to_type[gene_iid] = gene_type

                # assuming that these are in the ontology
                gu.addClassToGraph(g, disorder_id, disorder_label)

                assoc_list = elem.find('DisorderGeneAssociationList')
                for a in assoc_list.findall('DisorderGeneAssociation'):
                    gene_iid = a.find('.//Gene').get('id')
                    gene_name = a.find('.//Gene/Name').text
                    gene_symbol = a.find('.//Gene/Symbol').text
                    gene_num = a.find('./Gene/OrphaNumber').text
                    gene_id = 'Orphanet:'+str(gene_num)
                    gene_type_id = \
                        self._map_gene_type_id(gene_iid_to_type[gene_iid])
                    gu.addClassToGraph(
                        g, gene_id, gene_symbol, gene_type_id, gene_name)
                    syn_list = a.find('./Gene/SynonymList')
                    if int(syn_list.get('count')) > 0:
                        for s in syn_list.findall('./Synonym'):
                            gu.addSynonym(g, gene_id, s.text)

                    dgtype = a.find('DisorderGeneAssociationType').get('id')
                    rel_id = self._map_rel_id(dgtype)
                    dg_label = \
                        a.find('./DisorderGeneAssociationType/Name').text
                    if rel_id is None:
                        logger.warning(
                            "Cannot map association type (%s) to RO " +
                            "for association (%s | %s).  Skipping.",
                            dg_label, disorder_label, gene_symbol)
                        continue

                    alt_locus_id = '_'+gene_num+'-'+disorder_num+'VL'
                    alt_label = \
                        ' '.join(('some variant of', gene_symbol.strip(),
                                  'that is a', dg_label.lower(),
                                  disorder_label))
                    if self.nobnodes:
                        alt_locus_id = ':'+alt_locus_id
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label,
                                            geno.genoparts['variant_locus'])
                    geno.addAlleleOfGene(alt_locus_id, gene_id)

                    # consider typing the gain/loss-of-function variants like:
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002054
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002053

                    # use "assessed" status to issue an evidence code
                    # FIXME I think that these codes are sub-optimal
                    status_code = \
                        a.find('DisorderGeneAssociationStatus').get('id')
                    # imported automatically asserted information
                    # used in automatic assertion
                    eco_id = 'ECO:0000323'
                    # Assessed
                    # TODO are these internal ids stable between releases?
                    if status_code == '17991':
                        # imported manually asserted information
                        # used in automatic assertion
                        eco_id = 'ECO:0000322'
                    # Non-traceable author statement ECO_0000034
                    # imported information in automatic assertion ECO_0000313

                    assoc = G2PAssoc(self.name, alt_locus_id,
                                     disorder_id, rel_id)
                    assoc.add_evidence(eco_id)
                    assoc.add_association_to_graph(g)

                    rlist = a.find('./Gene/ExternalReferenceList')
                    eqid = None

                    for r in rlist.findall('ExternalReference'):
                        if r.find('Source').text == 'Ensembl':
                            eqid = 'ENSEMBL:'+r.find('Reference').text
                        elif r.find('Source').text == 'HGNC':
                            eqid = 'HGNC:'+r.find('Reference').text
                        elif r.find('Source').text == 'OMIM':
                            eqid = 'OMIM:'+r.find('Reference').text
                        else:
                            pass  # skip the others for now
                        if eqid is not None:
                            gu.addClassToGraph(g, eqid, None)
                            gu.addEquivalentClass(g, gene_id, eqid)
                elem.clear()  # discard the element

            if self.testMode and limit is not None and line_counter > limit:
                return

        gu.loadProperties(
            g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
        gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
        gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)
        gu.loadAllProperties(g)

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:104,代码来源:Orphanet.py


注:本文中的dipper.utils.GraphUtils.GraphUtils.addEquivalentClass方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。