当前位置: 首页>>代码示例>>Python>>正文


Python Genotype.addAlleleOfGene方法代码示例

本文整理汇总了Python中dipper.models.Genotype.Genotype.addAlleleOfGene方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addAlleleOfGene方法的具体用法?Python Genotype.addAlleleOfGene怎么用?Python Genotype.addAlleleOfGene使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.models.Genotype.Genotype的用法示例。


在下文中一共展示了Genotype.addAlleleOfGene方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_disease_association

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def process_disease_association(self, limit):

        raw = '/'.join((self.rawdir, self.files['disease_assoc']['file']))

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        gu = GraphUtils(curie_map.get())

        logger.info("Processing disease models")
        geno = Genotype(g, self.nobnodes)
        line_counter = 0
        worm_taxon = 'NCBITaxon:6239'
        with open(raw, 'r') as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                if re.match(r'!', ''.join(row)):  # header
                    continue
                line_counter += 1
                (db, gene_num, gene_symbol, is_not, disease_id, ref,
                 eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
                 gene_class, taxon, date, assigned_by, blank, blank2) = row

                if self.testMode and gene_num not in self.test_ids['gene']:
                    continue

                # TODO add NOT phenotypes
                if is_not == 'NOT':
                    continue

                # WB	WBGene00000001	aap-1		DOID:2583	PMID:19029536	IEA	ENSEMBL:ENSG00000145675|OMIM:615214	D		Y110A7A.10	gene	taxon:6239	20150612	WB
                gene_id = 'WormBase:'+gene_num

                # make a variant of the gene
                vl = '_'+'-'.join((gene_num, 'unspecified'))
                if self.nobnodes:
                    vl = ':'+vl
                vl_label = 'some variant of '+gene_symbol
                geno.addAlleleOfGene(vl, gene_id)
                animal_id = geno.make_experimental_model_with_genotype(
                    g, vl, vl_label, worm_taxon, 'worm')

                assoc = G2PAssoc(
                    self.name, animal_id,
                    disease_id, gu.object_properties['model_of'])
                ref = re.sub(r'WB_REF:', 'WormBase:', ref)
                if ref != '':
                    assoc.add_source(ref)
                eco_id = None
                if eco_symbol == 'IEA':
                    eco_id = 'ECO:0000501'  # IEA is this now
                if eco_id is not None:
                    assoc.add_evidence(eco_id)

                assoc.add_association_to_graph(g)

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:61,代码来源:WormBase.py

示例2: _get_process_allelic_variants

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _get_process_allelic_variants(self, entry, g):
        gu = GraphUtils(curie_map.get())
        geno = Genotype(g)
        du = DipperUtil()
        if entry is not None:
            publist = {}  # to hold the entry-specific publication mentions for the allelic variants
            entry_num = entry['mimNumber']

            # process the ref list just to get the pmids
            ref_to_pmid = self._get_pubs(entry, g)

            if 'allelicVariantList' in entry:
                allelicVariantList = entry['allelicVariantList']
                for al in allelicVariantList:
                    al_num = al['allelicVariant']['number']
                    al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
                    al_label = None
                    al_description = None
                    if al['allelicVariant']['status'] == 'live':
                        publist[al_id] = set()
                        if 'mutations' in al['allelicVariant']:
                            al_label = al['allelicVariant']['mutations']
                        if 'text' in al['allelicVariant']:
                            al_description = al['allelicVariant']['text']
                            m = re.findall('\{(\d+)\:', al_description)
                            publist[al_id] = set(m)
                        geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
                        geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
                                             geno.object_properties['is_sequence_variant_instance_of'])
                        for r in publist[al_id]:
                            pmid = ref_to_pmid[int(r)]
                            gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
                        # look up the pubmed id in the list of references
                        if 'dbSnps' in al['allelicVariant']:
                            dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
                            for dnum in dbsnp_ids:
                                did = 'dbSNP:'+dnum.strip()
                                gu.addIndividualToGraph(g, did, None)
                                gu.addEquivalentClass(g, al_id, did)
                        if 'clinvarAccessions' in al['allelicVariant']:
                            # clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
                            rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
                            rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
                            for rnum in rcv_ids:
                                rid = 'ClinVar:'+rnum
                                gu.addXref(g, al_id, rid)
                        gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
                    elif re.search('moved', al['allelicVariant']['status']):
                        # for both 'moved' and 'removed'
                        moved_ids = None
                        if 'movedTo' in al['allelicVariant']:
                            moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
                            moved_ids = [moved_id]
                        gu.addDeprecatedIndividual(g, al_id, moved_ids)
                    else:
                        logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
                # end loop allelicVariantList

        return
开发者ID:d3borah,项目名称:dipper,代码行数:61,代码来源:OMIM.py

示例3: _get_variants

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                    model.addSynonym(seqalt_id, hgvs_c)
                if hgvs_p != '-' and hgvs_p.strip() != '':
                    model.addSynonym(seqalt_id, hgvs_p)

                # add the dbsnp and dbvar ids as equivalent
                if dbsnp_num != '-' and int(dbsnp_num) != -1:
                    dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
                    model.addIndividualToGraph(dbsnp_id, None)
                    model.addSameIndividual(seqalt_id, dbsnp_id)
                if dbvar_num != '-':
                    dbvar_id = 'dbVar:'+dbvar_num
                    model.addIndividualToGraph(dbvar_id, None)
                    model.addSameIndividual(seqalt_id, dbvar_id)

                # TODO - not sure if this is right... add as xref?
                # the rcv is like the combo of the phenotype with the variant
                if rcv_nums != '-':
                    for rcv_num in re.split(r';', rcv_nums):
                        rcv_id = 'ClinVar:' + rcv_num
                        model.addIndividualToGraph(rcv_id, None)
                        model.addXref(seqalt_id, rcv_id)

                if gene_id is not None:
                    # add the gene
                    model.addClassToGraph(gene_id, gene_symbol)
                    # make a variant locus
                    vl_id = '_'+gene_num+'-'+variant_num
                    if self.nobnodes:
                        vl_id = ':'+vl_id
                    vl_label = allele_name
                    model.addIndividualToGraph(
                        vl_id, vl_label, geno.genoparts['variant_locus'])
                    geno.addSequenceAlterationToVariantLocus(seqalt_id, vl_id)
                    geno.addAlleleOfGene(vl_id, gene_id)
                else:
                    # some basic reporting
                    gmatch = re.search(r'\(\w+\)', allele_name)
                    if gmatch is not None and len(gmatch.groups()) > 0:
                        logger.info(
                            "Gene found in allele label, but no id provided: %s",
                            gmatch.group(1))
                    elif re.match(r'more than 10', gene_symbol):
                        logger.info(
                            "More than 10 genes found; "
                            "need to process XML to fetch (variant=%d)",
                            int(variant_num))
                    else:
                        logger.info(
                            "No gene listed for variant %d",
                            int(variant_num))

                # parse the list of "phenotypes" which are diseases.
                # add them as an association
                # ;GeneReviews:NBK1440,MedGen:C0392514,OMIM:235200,SNOMED CT:35400008;MedGen:C3280096,OMIM:614193;MedGen:CN034317,OMIM:612635;MedGen:CN169374
                # the list is both semicolon delimited and comma delimited,
                # but i don't know why! some are bad, like:
                # Orphanet:ORPHA ORPHA319705,SNOMED CT:49049000
                if phenotype_ids != '-':
                    for phenotype in pheno_list:
                        m = re.match(
                            r"(Orphanet:ORPHA(?:\s*ORPHA)?)", phenotype)
                        if m is not None and len(m.groups()) > 0:
                            phenotype = re.sub(
                                m.group(1), 'Orphanet:', phenotype.strip())
                        elif re.match(r'ORPHA:\d+', phenotype):
                            phenotype = re.sub(
开发者ID:DoctorBud,项目名称:dipper,代码行数:70,代码来源:ClinVar.py

示例4: _process_diseasegene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _process_diseasegene(self, limit):
        """
        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())

        myfile = "/".join((self.rawdir, self.files["disease-gene"]["file"]))

        for event, elem in ET.iterparse(myfile):
            if elem.tag == "Disorder":
                # get the element name and id
                # id = elem.get('id') # some internal identifier
                disorder_num = elem.find("OrphaNumber").text

                disorder_id = "Orphanet:" + str(disorder_num)

                if self.testMode and disorder_id not in config.get_config()["test_ids"]["disease"]:
                    continue

                disorder_label = elem.find("Name").text

                # make a hash of internal gene id to type for later lookup
                gene_iid_to_type = {}
                gene_list = elem.find("GeneList")
                for gene in gene_list.findall("Gene"):
                    gene_iid = gene.get("id")
                    gene_type = gene.find("GeneType").get("id")
                    gene_iid_to_type[gene_iid] = gene_type

                gu.addClassToGraph(g, disorder_id, disorder_label)  # assuming that these are in the ontology

                assoc_list = elem.find("DisorderGeneAssociationList")
                for a in assoc_list.findall("DisorderGeneAssociation"):
                    gene_iid = a.find(".//Gene").get("id")
                    gene_name = a.find(".//Gene/Name").text
                    gene_symbol = a.find(".//Gene/Symbol").text
                    gene_num = a.find("./Gene/OrphaNumber").text
                    gene_id = "Orphanet:" + str(gene_num)
                    gene_type_id = self._map_gene_type_id(gene_iid_to_type[gene_iid])
                    gu.addClassToGraph(g, gene_id, gene_symbol, gene_type_id, gene_name)
                    syn_list = a.find("./Gene/SynonymList")
                    if int(syn_list.get("count")) > 0:
                        for s in syn_list.findall("./Synonym"):
                            gu.addSynonym(g, gene_id, s.text)

                    dgtype = a.find("DisorderGeneAssociationType").get("id")
                    rel_id = self._map_rel_id(dgtype)
                    dg_label = a.find("./DisorderGeneAssociationType/Name").text
                    if rel_id is None:
                        logger.warn(
                            "Cannot map association type (%s) to RO for association (%s | %s).  Skipping.",
                            dg_label,
                            disorder_label,
                            gene_symbol,
                        )
                        continue

                    alt_locus_id = "_" + gene_num + "-" + disorder_num + "VL"
                    alt_label = " ".join(
                        ("some variant of", gene_symbol.strip(), "that is a", dg_label.lower(), disorder_label)
                    )
                    if self.nobnodes:
                        alt_locus_id = ":" + alt_locus_id
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts["variant_locus"])
                    geno.addAlleleOfGene(alt_locus_id, gene_id)

                    # consider typing the gain/loss-of-function variants like:
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002054
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002053

                    # use "assessed" status to issue an evidence code
                    # FIXME I think that these codes are sub-optimal
                    status_code = a.find("DisorderGeneAssociationStatus").get("id")
                    eco_id = "ECO:0000323"  # imported automatically asserted information used in automatic assertion
                    if status_code == "17991":  # Assessed  # TODO are these internal ids stable between releases?
                        eco_id = "ECO:0000322"  # imported manually asserted information used in automatic assertion
                    # Non-traceable author statement ECO_0000034
                    # imported information in automatic assertion ECO_0000313

                    assoc = G2PAssoc(self.name, alt_locus_id, disorder_id, rel_id)
                    assoc.add_evidence(eco_id)
                    assoc.add_association_to_graph(g)

                    rlist = a.find("./Gene/ExternalReferenceList")
                    eqid = None

                    for r in rlist.findall("ExternalReference"):
                        if r.find("Source").text == "Ensembl":
                            eqid = "ENSEMBL:" + r.find("Reference").text
                        elif r.find("Source").text == "HGNC":
                            eqid = "HGNC:" + r.find("Reference").text
                        elif r.find("Source").text == "OMIM":
                            eqid = "OMIM:" + r.find("Reference").text
#.........这里部分代码省略.........
开发者ID:d3borah,项目名称:dipper,代码行数:103,代码来源:Orphanet.py

示例5: _process_phenotype_data

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                    # assume the phenotype label is in the ontology
                    gu.addClassToGraph(g, pid, None)
                    if mgi_allele_id is not None and mgi_allele_id != '':
                        assoc = G2PAssoc(self.name, mgi_allele_id, pid,
                                         gu.object_properties['has_phenotype'])
                        for p in pubmed_ids:
                            assoc.add_source(p)
                        assoc.add_association_to_graph(g)
                    else:
                        logger.info("Phenotypes and no allele for %s",
                                    strain_id)

                if not self.testMode and (
                        limit is not None and line_counter > limit):
                    break

            # now that we've collected all of the variant information, build it
            # we don't know their zygosities
            for s in self.strain_hash:
                h = self.strain_hash.get(s)
                variants = h['variants']
                genes = h['genes']
                vl_set = set()
                # make variant loci for each gene
                if len(variants) > 0:
                    for v in variants:
                        vl_id = v
                        vl_symbol = self.id_label_hash[vl_id]
                        geno.addAllele(vl_id, vl_symbol,
                                       geno.genoparts['variant_locus'])
                        vl_set.add(vl_id)
                        if len(variants) == 1 and len(genes) == 1:
                            for gene in genes:
                                geno.addAlleleOfGene(vl_id, gene)
                        else:
                            geno.addAllele(vl_id, vl_symbol)
                else:  # len(vars) == 0
                    # it's just anonymous variants in some gene
                    for gene in genes:
                        vl_id = '_'+gene+'-VL'
                        vl_id = re.sub(r':', '', vl_id)
                        if self.nobnodes:
                            vl_id = ':'+vl_id
                        vl_symbol = self.id_label_hash[gene]+'<?>'
                        self.id_label_hash[vl_id] = vl_symbol
                        geno.addAllele(vl_id, vl_symbol,
                                       geno.genoparts['variant_locus'])
                        geno.addGene(gene, self.id_label_hash[gene])
                        geno.addAlleleOfGene(vl_id, gene)
                        vl_set.add(vl_id)

                # make the vslcs
                vl_list = sorted(vl_set)
                vslc_list = []
                for vl in vl_list:
                    # for unknown zygosity
                    vslc_id = '_'+re.sub(r'^_', '', vl)+'U'
                    vslc_id = re.sub(r':', '', vslc_id)
                    if self.nobnodes:
                        vslc_id = ':' + vslc_id
                    vslc_label = self.id_label_hash[vl] + '/?'
                    self.id_label_hash[vslc_id] = vslc_label
                    vslc_list.append(vslc_id)
                    geno.addPartsToVSLC(
                        vslc_id, vl, None, geno.zygosity['indeterminate'],
                        geno.object_properties['has_alternate_part'], None)
开发者ID:JervenBolleman,项目名称:dipper,代码行数:70,代码来源:MMRRC.py

示例6: Decipher

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                            r = Reference(
                                pmid, Reference.ref_types['journal_article'])
                            r.addRefToGraph(g)
                            assoc.add_source(pmid)

                    assoc.add_association_to_graph(g)
                else:
                    # these are unmapped to a disease id.
                    # note that some match OMIM disease labels
                    # but the identifiers are just not included.
                    # TODO consider mapping to OMIM or DOIDs in other ways
                    logger.warning(
                        "No omim id on line %d\n%s", line_counter, str(row))
                    unmapped_omim_counter += 1

                # TODO hpo phenotypes
                # since the DDG2P file is not documented,
                # I don't know what the HPO annotations are actually about
                # are they about the gene?  the omim disease?  something else?
                # So, we wont create associations until this is clarified

                if not self.testMode and limit is not None \
                        and line_counter > limit:
                    break

        myzip.close()
        logger.warning(
            "gene-disorder associations with no omim id: %d",
            unmapped_omim_counter)
        logger.warning("unmapped gene count: %d", unmapped_gene_count)

        gu.loadProperties(g, G2PAssoc.object_properties, gu.OBJPROP)
        gu.loadProperties(g, G2PAssoc.datatype_properties, gu.DATAPROP)
        gu.loadProperties(g, G2PAssoc.annotation_properties, gu.ANNOTPROP)

        return

    def make_allele_by_consequence(self, consequence, gene_id, gene_symbol):
        """
        Given a "consequence" label that describes a variation type,
        create an anonymous variant of the specified gene as an instance of
        that consequence type.

        :param consequence:
        :param gene_id:
        :param gene_symbol:
        :return: allele_id
        """

        allele_id = None

        # Loss of function : Nonsense, frame-shifting indel,
        #   essential splice site mutation, whole gene deletion or any other
        #   mutation where functional analysis demonstrates clear reduction
        #   or loss of function
        # All missense/in frame : Where all the mutations described in the data
        #   source are either missense or in frame deletions and there is no
        #   evidence favoring either loss-of-function, activating or
        #   dominant negative effect
        # Dominant negative : Mutation within one allele of a gene that creates
        #   a significantly greater deleterious effect on gene product
        #   function than a monoallelic loss of function mutation
        # Activating : Mutation, usually missense that results in
        #   a constitutive functional activation of the gene product
        # Increased gene dosage : Copy number variation that increases
        #   the functional dosage of the gene
        # Cis-regulatory or promotor mutation : Mutation in cis-regulatory
        #   elements that lies outwith the known transcription unit and
        #   promotor of the controlled gene
        # Uncertain : Where the exact nature of the mutation is unclear or
        #   not recorded
        so_type = {                                 # type of variant
            'Loss of function': 'SO:0002054',       # loss of function
            'All missense/in frame': 'SO:0001583',  # missense
            'Dominant negative': 'SO:0002052',      # dominant negative
            'Activating': 'SO:0002053',             # gain of function
            'Increased gene dosage': 'SO:0001742',  # copy number gain
            # regulatory region
            'Cis-regulatory or promotor mutation': 'SO:0001566',
            'Uncertain': 'SO:0001060',              # generic sequence
            '5 or 3UTR mutation': 'SO:0001622',     # UTR
        }

        type_id = so_type.get(consequence)
        if type_id is None:
            logger.warning("Consequence type unmapped: %s", str(consequence))
            type_id = 'SO:0001060'  # sequence variant

        # make the allele
        allele_id = ''.join((gene_id, type_id))
        allele_id = re.sub(r':', '', allele_id)
        allele_id = '_'+allele_id  # make this a BNode
        if self.nobnodes:
            allele_id = ':'+allele_id
        allele_label = ' '.join((consequence, 'allele in', gene_symbol))

        self.gu.addIndividualToGraph(self.g, allele_id, allele_label, type_id)
        self.geno.addAlleleOfGene(allele_id, gene_id)

        return allele_id
开发者ID:JervenBolleman,项目名称:dipper,代码行数:104,代码来源:Decipher.py

示例7: _process_omim2gene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _process_omim2gene(self, limit=None):
        """
        This method maps the OMIM IDs and KEGG gene ID. Currently split based on the link_type field.
        Equivalent link types are mapped as gene XRefs.
        Reverse link types are mapped as disease to gene associations.
        Original link types are currently skipped.

        Triples created:
        <kegg_gene_id> is a Gene
        <omim_gene_id> is a Gene
        <kegg_gene_id>> hasXref <omim_gene_id>

        <assoc_id> has subject <omim_disease_id>
        <assoc_id> has object <kegg_gene_id>
        :param limit:
        :return:
        """

        logger.info("Processing OMIM to KEGG gene")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())
        raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (kegg_gene_id, omim_id, link_type) = row

                if self.testMode and kegg_gene_id not in self.test_ids['genes']:
                    continue

                kegg_gene_id = 'KEGG-'+kegg_gene_id.strip()
                omim_id = re.sub('omim', 'OMIM', omim_id)
                if link_type == 'equivalent':
                    # these are genes!  so add them as a class then make equivalence
                    gu.addClassToGraph(g, omim_id, None)
                    geno.addGene(kegg_gene_id, None)
                    gu.addEquivalentClass(g, kegg_gene_id, omim_id)
                elif link_type == 'reverse':
                    # make an association between an OMIM ID and the KEGG gene ID
                    # we do this with omim ids because they are more atomic than KEGG ids

                    alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
                    alt_label = self.label_hash[alt_locus_id]
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts['variant_locus'])
                    geno.addAlleleOfGene(alt_locus_id, kegg_gene_id)

                    # Add the disease to gene relationship.
                    rel = gu.object_properties['is_marker_for']
                    assoc = G2PAssoc(self.name, alt_locus_id, omim_id, rel)
                    assoc.add_association_to_graph(g)

                elif link_type == 'original':
                    # these are sometimes a gene, and sometimes a disease
                    logger.info('Unable to handle original link for %s-%s', kegg_gene_id, omim_id)
                else:
                    # don't know what these are
                    logger.warn('Unhandled link type for %s-%s: %s', kegg_gene_id, omim_id, link_type)

                if (not self.testMode) and (limit is not None and line_counter > limit):
                    break

        logger.info("Done with OMIM to KEGG gene")
        gu.loadProperties(g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
        gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
        gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)

        return
开发者ID:d3borah,项目名称:dipper,代码行数:75,代码来源:KEGG.py

示例8: _process_kegg_disease2gene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _process_kegg_disease2gene(self, limit=None):
        """
        This method creates an association between diseases and their associated genes.
        We are being conservative here, and only processing those diseases for which there
        is no mapping to OMIM.

        Triples created:
        <alternate_locus> is an Individual
        <alternate_locus> has type <variant_locus>
        <alternate_locus> is an allele of  <gene_id>

        <assoc_id> has subject <disease_id>
        <assoc_id> has object <gene_id>
        :param limit:
        :return:
        """

        logger.info("Processing KEGG disease to gene")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())
        rel = gu.object_properties['is_marker_for']
        gu.loadAllProperties(g)
        noomimset = set()
        raw = '/'.join((self.rawdir, self.files['disease_gene']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (gene_id, disease_id) = row

                if self.testMode and gene_id not in self.test_ids['genes']:
                    continue

                gene_id = 'KEGG-'+gene_id.strip()
                disease_id = 'KEGG-'+disease_id.strip()

                # only add diseases for which there is no omim id and not a grouping class
                if disease_id not in self.kegg_disease_hash:
                    # add as a class
                    disease_label = None
                    if disease_id in self.label_hash:
                        disease_label = self.label_hash[disease_id]
                    if re.search('includ', str(disease_label)):
                        # they use 'including' when it's a grouping class
                        logger.info("Skipping this association because it's a grouping class: %s", disease_label)
                        continue
                    gu.addClassToGraph(g, disease_id, disease_label, 'DOID:4')  # type this disease_id as a disease
                    noomimset.add(disease_id)
                    alt_locus_id = self._make_variant_locus_id(gene_id, disease_id)
                    alt_label = self.label_hash[alt_locus_id]
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts['variant_locus'])
                    geno.addAlleleOfGene(alt_locus_id, gene_id)
                    # Add the disease to gene relationship.
                    assoc = G2PAssoc(self.name, alt_locus_id, disease_id, rel)
                    assoc.load_all_properties(g)
                    assoc.add_association_to_graph(g)

                if (not self.testMode) and (limit is not None and line_counter > limit):
                    break

        logger.info("Done with KEGG disease to gene")
        logger.info("Found %d diseases with no omim id", len(noomimset))

        return
开发者ID:d3borah,项目名称:dipper,代码行数:71,代码来源:KEGG.py

示例9: OMIA

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
        lidaurl = row['lidaurl']

        if self.testMode and omia_id not in self.test_ids['disease']:
            return

        self.gu.addXref(self.g, omia_id, lidaurl, True)

        return

    def _process_phene_gene_row(self, row):

        gene_id = self.id_hash['gene'].get(row['gene_id'])
        phene_id = self.id_hash['phene'].get(row['phene_id'])

        omia_id = self._get_omia_id_from_phene_id(phene_id)

        if self.testMode and not (
                omia_id in self.test_ids['disease'] and
                row['gene_id'] in self.test_ids['gene']) or\
                gene_id is None or phene_id is None:
            return

        # occasionally some phenes are missing!  (ex: 406)
        if phene_id is None:
            logger.warning("Phene id %s is missing", str(row['phene_id']))
            return

        gene_label = self.label_hash[gene_id]
        # some variant of gene_id has phenotype d
        vl = '_'+re.sub(r'NCBIGene:', '', str(gene_id)) + 'VL'
        if self.nobnodes:
            vl = ':'+vl
        self.geno.addAllele(vl, 'some variant of ' + gene_label)
        self.geno.addAlleleOfGene(vl, gene_id)
        assoc = G2PAssoc(self.name, vl, phene_id)
        assoc.add_association_to_graph(self.g)

        # add the gene id to the set of annotated genes
        # for later lookup by orthology
        self.annotated_genes.add(gene_id)

        return

    def _process_omia_omim_map(self, row):
        """
        Links OMIA groups to OMIM equivalents.
        :param row:
        :return:
        """
        # omia_id, omim_id, added_by

        omia_id = 'OMIA:'+row['omia_id']
        omim_id = 'OMIM:'+row['omim_id']

        # also store this for use when we say that a given animal is
        # a model of a disease
        if omia_id not in self.omia_omim_map:
            self.omia_omim_map[omia_id] = set()
        self.omia_omim_map[omia_id].add(omim_id)

        if self.testMode and omia_id not in self.test_ids['disease']:
            return

        self.gu.addXref(self.g, omia_id, omim_id)

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:70,代码来源:OMIA.py

示例10: CTD

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
        preferred_disease_id = disease_id
        if omim_ids is not None and omim_ids != '':
            omim_id_list = re.split('\|', omim_ids)
            # If there is only one OMIM ID for the Disease ID or in the omim_ids list,
            # use the OMIM ID preferentially over any MeSH ID.
            if re.match('OMIM:.*', disease_id):
                if len(omim_id_list) > 1:
                    # the disease ID is an OMIM ID and there is more than one OMIM entry in omim_ids.
                    # Currently no entries satisfy this condition
                    pass
                elif disease_id != ('OMIM:'+omim_ids):
                    # the disease ID is an OMIM ID and there is only one non-equiv OMIM entry in omim_ids
                    # we preferentially use the disease_id here
                    logger.warn("There may be alternate identifier for %s: %s", disease_id, omim_ids)
                    # TODO: What should be done with the alternate disease IDs?
            else:
                if len(omim_id_list) == 1:
                    # the disease ID is not an OMIM ID and there is only one OMIM entry in omim_ids.
                    preferred_disease_id = 'OMIM:'+omim_ids
                elif len(omim_id_list) > 1:
                    # This is when the disease ID is not an OMIM ID and there is more than one OMIM entry in omim_ids.
                    pass

        # we actually want the association between the gene and the disease to be via an alternate locus
        # not the "wildtype" gene itself.
        # so we make an anonymous alternate locus, and put that in the association.
        alt_locus = '_'+gene_id+'-'+preferred_disease_id+'VL'
        alt_locus = re.sub(':', '', alt_locus)  # can't have colons in the bnodes
        if self.nobnodes:
            alt_locus = ':'+alt_locus
        alt_label = 'some variant of '+gene_symbol+' that is '+direct_evidence+' for '+disease_name
        self.gu.addIndividualToGraph(self.g, alt_locus, alt_label, self.geno.genoparts['variant_locus'])
        self.gu.addClassToGraph(self.g, gene_id, None)  # assume that the label gets added elsewhere
        self.geno.addAlleleOfGene(alt_locus, gene_id)

        # not sure if MESH is getting added separately.  adding labels here for good measure
        dlabel = None
        if re.match('MESH', preferred_disease_id):
            dlabel = disease_name
        self.gu.addClassToGraph(self.g, preferred_disease_id, dlabel)

        # Add the disease to gene relationship.
        rel_id = self._get_relationship_id(direct_evidence)
        refs = self._process_pubmed_ids(pubmed_ids)

        self._make_association(alt_locus, preferred_disease_id, rel_id, refs)

        return

    def _make_association(self, subject_id, object_id, rel_id, pubmed_ids):
        """
        Make a reified association given an array of pubmed identifiers.

        Args:
            :param subject_id  id of the subject of the association (gene/chem)
            :param object_id  id of the object of the association (disease)
            :param rel_id  relationship id
            :param pubmed_ids an array of pubmed identifiers
        Returns:
            :return None
        """

        # TODO pass in the relevant Assoc class rather than relying on G2P
        assoc = G2PAssoc(self.name, subject_id, object_id, rel_id)
        if pubmed_ids is not None and len(pubmed_ids) > 0:
            eco = self._get_evidence_code('TAS')
开发者ID:d3borah,项目名称:dipper,代码行数:70,代码来源:CTD.py

示例11: _process_phenotype_data

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                    # assume the phenotype label is in some ontology
                    model.addClassToGraph(pid, None)
                    if mgi_allele_id is not None and mgi_allele_id != '':
                        assoc = G2PAssoc(
                            graph, self.name, mgi_allele_id, pid,
                            self.globaltt['has phenotype'])
                        for p in pubmed_ids:
                            assoc.add_source(p)
                        assoc.add_association_to_graph()
                    else:
                        LOG.info("Phenotypes and no allele for %s", strain_id)

                if not self.test_mode and (
                        limit is not None and reader.line_num > limit):
                    break

            # now that we've collected all of the variant information, build it
            # we don't know their zygosities
            for s in self.strain_hash:
                h = self.strain_hash.get(s)
                variants = h['variants']
                genes = h['genes']
                vl_set = set()
                # make variant loci for each gene
                if len(variants) > 0:
                    for var in variants:
                        vl_id = var.strip()
                        vl_symbol = self.id_label_hash[vl_id]
                        geno.addAllele(
                            vl_id, vl_symbol, self.globaltt['variant_locus'])
                        vl_set.add(vl_id)
                        if len(variants) == 1 and len(genes) == 1:
                            for gene in genes:
                                geno.addAlleleOfGene(vl_id, gene)
                        else:
                            geno.addAllele(vl_id, vl_symbol)
                else:  # len(vars) == 0
                    # it's just anonymous variants in some gene
                    for gene in genes:
                        vl_id = '_:' + re.sub(r':', '', gene) + '-VL'
                        vl_symbol = self.id_label_hash[gene]+'<?>'
                        self.id_label_hash[vl_id] = vl_symbol
                        geno.addAllele(
                            vl_id, vl_symbol, self.globaltt['variant_locus'])
                        geno.addGene(gene, self.id_label_hash[gene])
                        geno.addAlleleOfGene(vl_id, gene)
                        vl_set.add(vl_id)

                # make the vslcs
                vl_list = sorted(vl_set)
                vslc_list = []
                for vl in vl_list:
                    # for unknown zygosity
                    vslc_id = re.sub(r'^_', '', vl)+'U'
                    vslc_id = re.sub(r':', '', vslc_id)
                    vslc_id = '_:' + vslc_id
                    vslc_label = self.id_label_hash[vl] + '/?'
                    self.id_label_hash[vslc_id] = vslc_label
                    vslc_list.append(vslc_id)
                    geno.addPartsToVSLC(
                        vslc_id, vl, None, self.globaltt['indeterminate'],
                        self.globaltt['has_variant_part'], None)
                    model.addIndividualToGraph(
                        vslc_id, vslc_label,
                        self.globaltt['variant single locus complement'])
                if len(vslc_list) > 0:
开发者ID:TomConlin,项目名称:dipper,代码行数:70,代码来源:MMRRC.py

示例12: _process_QTLs_genetic_location

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
        """
        This function processes

        Triples created:

        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())
        eco_id = "ECO:0000061"  # Quantitative Trait Analysis Evidence

        logger.info("Processing genetic location for %s", taxon_id)
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
                 flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
                 sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
                 trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row

                if self.testMode and int(qtl_id) not in self.test_ids:
                    continue

                qtl_id = 'AQTL:'+qtl_id
                trait_id = 'AQTLTrait:'+trait_id

                # Add QTL to graph
                f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
                f.addTaxonToFeature(g, taxon_id)

                # deal with the chromosome
                chrom_id = makeChromID(chromosome, taxon_id, 'CHR')

                # add a version of the chromosome which is defined as the genetic map
                build_id = 'MONARCH:'+common_name.strip()+'-linkage'
                build_label = common_name+' genetic map'
                geno.addReferenceGenome(build_id, build_label, taxon_id)
                chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
                geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
                start = stop = None
                if re.search('-', range_cm):
                    range_parts = re.split('-', range_cm)
                    # check for poorly formed ranges
                    if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
                        (start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
                    else:
                        logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
                elif position_cm != '':
                    start = stop = int(float(position_cm))

                # FIXME remove converion to int for start/stop when schema can handle floats
                # add in the genetic location based on the range
                f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
                f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
                f.addFeatureToGraph(g)

                # sometimes there's a peak marker, like a rsid.  we want to add that as a variant of the gene,
                # and xref it to the qtl.
                dbsnp_id = None
                if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
                    dbsnp_id = 'dbSNP:'+peak_mark.strip()

                    gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
                    gu.addXref(g, qtl_id, dbsnp_id)

                if gene_id is not None and gene_id != '' and gene_id != '.':
                    if gene_id_src == 'NCBIgene' or gene_id_src == '':  # we assume if no src is provided, it's NCBI
                        gene_id = 'NCBIGene:'+gene_id.strip()
                        geno.addGene(gene_id, None)  # we will expect that these labels provided elsewhere
                        geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation'])   # FIXME what is the right relationship here?

                        if dbsnp_id is not None:
                            # add the rsid as a seq alt of the gene_id
                            vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
                            if self.nobnodes:
                                vl_id = ':' + vl_id
                            geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
                            geno.addAlleleOfGene(vl_id, gene_id)

                # add the trait
                gu.addClassToGraph(g, trait_id, trait_name)

                # Add publication
                r = None
                if re.match('ISU.*', pubmed_id):
                    pub_id = 'AQTLPub:'+pubmed_id.strip()
                    r = Reference(pub_id)
                elif pubmed_id != '':
                    pub_id = 'PMID:'+pubmed_id.strip()
                    r = Reference(pub_id, Reference.ref_types['journal_article'])

                if r is not None:
#.........这里部分代码省略.........
开发者ID:d3borah,项目名称:dipper,代码行数:103,代码来源:AnimalQTLdb.py

示例13: process_catalog

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                    # the variant from the context
                    for c in re.split(r';', context):
                        cid = self._map_variant_type(c.strip())
                        if cid is not None:
                            gu.addType(g, rs_id, cid)

                    # add deprecation information
                    if merged == 1 and str(snp_id_current.strip()) != '':
                        # get the current rs_id
                        current_rs_id = 'dbSNP:'
                        if not re.match(r'rs', snp_id_current):
                            current_rs_id += 'rs'
                        if loc is not None:
                            loc_to_id_hash[loc].append(current_rs_id)
                        current_rs_id += str(snp_id_current)
                        gu.addDeprecatedIndividual(g, rs_id, current_rs_id)
                        # TODO check on this
                        # should we add the annotations to the current
                        # or orig?
                        gu.makeLeader(g, current_rs_id)
                    else:
                        gu.makeLeader(g, rs_id)

                    # add the feature as a sequence alteration
                    # affecting various genes
                    # note that intronic variations don't necessarily list
                    # the genes such as for rs10448080  FIXME
                    if snp_gene_nums != '':
                        for s in re.split(r',', snp_gene_nums):
                            s = s.strip()
                            # still have to test for this,
                            # because sometimes there's a leading comma
                            if s != '':
                                gene_id = 'NCBIGene:'+s
                                geno.addAlleleOfGene(rs_id, gene_id)

                    # add the up and downstream genes if they are available
                    if upstream_gene_num != '':
                        downstream_gene_id = 'NCBIGene:'+downstream_gene_num
                        gu.addTriple(
                            g, rs_id,
                            Feature.object_properties[
                                r'upstream_of_sequence_of'],
                            downstream_gene_id)
                    if downstream_gene_num != '':
                        upstream_gene_id = 'NCBIGene:'+upstream_gene_num
                        gu.addTriple(
                            g, rs_id,
                            Feature.object_properties[
                                'downstream_of_sequence_of'],
                            upstream_gene_id)

                    description = 'A study of ' + disease_or_trait + \
                        ' in ' + initial_sample_description
                    if replicate_sample_description != '':
                        description = \
                            ' '.join(
                                (description, 'with',
                                 replicate_sample_description))
                    if platform_with_snps_passing_qc != '':
                        description = ' '.join(
                            (description, 'on platform',
                             platform_with_snps_passing_qc))
                    description = ' '.join((description, '(p='+pvalue+')'))

                    # make associations to the EFO terms; there can be >1
                    if mapped_trait_uri.strip() != '':
                        for t in re.split(r',', mapped_trait_uri):
                            t = t.strip()

                            cu = CurieUtil(curie_map.get())
                            tid = cu.get_curie(t)

                            assoc = G2PAssoc(
                                self.name, rs_id, tid,
                                gu.object_properties['contributes_to'])
                            assoc.add_source(pubmed_id)
                            # combinatorial evidence
                            # used in automatic assertion
                            eco_id = 'ECO:0000213'
                            assoc.add_evidence(eco_id)

                            # assoc.set_description(description)
                            # FIXME score should get added to provenance/study
                            # assoc.set_score(pvalue)
                            assoc.add_association_to_graph(g)

                    if not self.testMode and\
                            (limit is not None and line_counter > limit):
                        break

            Assoc(self.name).load_all_properties(g)

        # loop through the location hash,
        # and make all snps at that location equivalent
        for l in loc_to_id_hash:
            snp_ids = loc_to_id_hash[l]
            if len(snp_ids) > 1:
                logger.info("%s has >1 snp id: %s", l, str(snp_ids))
        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:104,代码来源:GWASCatalog.py

示例14: _process_diseasegene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]
    def _process_diseasegene(self, limit):
        """
        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        gu = GraphUtils(curie_map.get())

        myfile = '/'.join((self.rawdir, self.files['disease-gene']['file']))

        # PYLINT complains iterparse deprecated,
        # but as of py 3.4 only the optional & unsupplied parse arg is.
        for event, elem in ET.iterparse(myfile):
            if elem.tag == 'Disorder':
                # get the element name and id, ignoreS element name
                # id = elem.get('id') # some internal identifier
                disorder_num = elem.find('OrphaNumber').text

                disorder_id = 'Orphanet:'+str(disorder_num)

                if self.testMode and \
                        disorder_id not in \
                        config.get_config()['test_ids']['disease']:
                    continue

                disorder_label = elem.find('Name').text

                # make a hash of internal gene id to type for later lookup
                gene_iid_to_type = {}
                gene_list = elem.find('GeneList')
                for gene in gene_list.findall('Gene'):
                    gene_iid = gene.get('id')
                    gene_type = gene.find('GeneType').get('id')
                    gene_iid_to_type[gene_iid] = gene_type

                # assuming that these are in the ontology
                gu.addClassToGraph(g, disorder_id, disorder_label)

                assoc_list = elem.find('DisorderGeneAssociationList')
                for a in assoc_list.findall('DisorderGeneAssociation'):
                    gene_iid = a.find('.//Gene').get('id')
                    gene_name = a.find('.//Gene/Name').text
                    gene_symbol = a.find('.//Gene/Symbol').text
                    gene_num = a.find('./Gene/OrphaNumber').text
                    gene_id = 'Orphanet:'+str(gene_num)
                    gene_type_id = \
                        self._map_gene_type_id(gene_iid_to_type[gene_iid])
                    gu.addClassToGraph(
                        g, gene_id, gene_symbol, gene_type_id, gene_name)
                    syn_list = a.find('./Gene/SynonymList')
                    if int(syn_list.get('count')) > 0:
                        for s in syn_list.findall('./Synonym'):
                            gu.addSynonym(g, gene_id, s.text)

                    dgtype = a.find('DisorderGeneAssociationType').get('id')
                    rel_id = self._map_rel_id(dgtype)
                    dg_label = \
                        a.find('./DisorderGeneAssociationType/Name').text
                    if rel_id is None:
                        logger.warning(
                            "Cannot map association type (%s) to RO " +
                            "for association (%s | %s).  Skipping.",
                            dg_label, disorder_label, gene_symbol)
                        continue

                    alt_locus_id = '_'+gene_num+'-'+disorder_num+'VL'
                    alt_label = \
                        ' '.join(('some variant of', gene_symbol.strip(),
                                  'that is a', dg_label.lower(),
                                  disorder_label))
                    if self.nobnodes:
                        alt_locus_id = ':'+alt_locus_id
                    gu.addIndividualToGraph(g, alt_locus_id, alt_label,
                                            geno.genoparts['variant_locus'])
                    geno.addAlleleOfGene(alt_locus_id, gene_id)

                    # consider typing the gain/loss-of-function variants like:
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002054
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002053

                    # use "assessed" status to issue an evidence code
                    # FIXME I think that these codes are sub-optimal
                    status_code = \
                        a.find('DisorderGeneAssociationStatus').get('id')
                    # imported automatically asserted information
                    # used in automatic assertion
                    eco_id = 'ECO:0000323'
                    # Assessed
                    # TODO are these internal ids stable between releases?
                    if status_code == '17991':
                        # imported manually asserted information
                        # used in automatic assertion
                        eco_id = 'ECO:0000322'
                    # Non-traceable author statement ECO_0000034
                    # imported information in automatic assertion ECO_0000313
#.........这里部分代码省略.........
开发者ID:JervenBolleman,项目名称:dipper,代码行数:103,代码来源:Orphanet.py

示例15: process_allele_phenotype

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAlleleOfGene [as 别名]

#.........这里部分代码省略.........
                 eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
                 gene_class, taxon, date, assigned_by, blank, blank2) = row

                if self.testMode and gene_num not in self.test_ids['gene']:
                    continue

                # TODO add NOT phenotypes
                if is_not == 'NOT':
                    continue

                eco_id = None
                if eco_symbol == 'IMP':
                    eco_id = 'ECO:0000015'
                elif eco_symbol.strip() != '':
                    logger.warning(
                        "Encountered an ECO code we don't have: %s",
                        eco_symbol)

                # according to the GOA spec, persons are not allowed to be
                # in the reference column, therefore they the variant and
                # persons are swapped between the reference and with column.
                # we unswitch them here.
                temp_var = temp_ref = None
                if re.search(r'WBVar|WBRNAi', ref):
                    temp_var = ref
                    # move the paper from the with column into the ref
                if re.search(r'WBPerson', with_or_from):
                    temp_ref = with_or_from
                if temp_var is not None or temp_ref is not None:
                    with_or_from = temp_var
                    ref = temp_ref

                allele_list = re.split(r'\|', with_or_from)
                if len(allele_list) == 0:
                    logger.error(
                        "Missing alleles from phenotype assoc at line %d",
                        line_counter)
                    continue
                else:
                    for a in allele_list:
                        allele_num = re.sub(r'WB:', '', a.strip())
                        allele_id = 'WormBase:'+allele_num
                        gene_id = 'WormBase:'+gene_num

                        if re.search(r'WBRNAi', allele_id):
                            # make the reagent-targeted gene,
                            # & annotate that instead of the RNAi item directly
                            rnai_num = re.sub(r'WormBase:', '', allele_id)
                            rnai_id = allele_id
                            rtg_id = self.make_reagent_targeted_gene_id(
                                gene_num, rnai_num, self.nobnodes)
                            geno.addReagentTargetedGene(
                                rnai_id, 'WormBase:'+gene_num, rtg_id)
                            geno.addGeneTargetingReagent(
                                rnai_id, None, geno.genoparts['RNAi_reagent'],
                                gene_id)
                            allele_id = rtg_id
                        elif re.search(r'WBVar', allele_id):
                            # this may become deprecated by using wormmine
                            # make the allele to gene relationship
                            # the WBVars are really sequence alterations

                            # the public name will come from elsewhere
                            geno.addSequenceAlteration(allele_id, None)
                            vl_id = '_'+'-'.join((gene_num, allele_num))
                            if self.nobnodes:
                                vl_id = ':'+vl_id
                            geno.addSequenceAlterationToVariantLocus(
                                allele_id, vl_id)
                            geno.addAlleleOfGene(vl_id, gene_id)
                        else:
                            logger.warning(
                                "Some kind of allele I don't recognize: %s",
                                allele_num)
                            continue
                        assoc = G2PAssoc(self.name, allele_id, phenotype_id)

                        if eco_id is not None:
                            assoc.add_evidence(eco_id)

                        if ref is not None and ref != '':
                            ref = re.sub(r'(WB:|WB_REF:)', 'WormBase:', ref)
                            r = Reference(ref)
                            if re.search(r'Person', ref):
                                r.setType(r.ref_types['person'])
                                # also add
                                # inferred from background scientific knowledge
                                assoc.add_evidence('ECO:0000001')
                            r.addRefToGraph(g)
                            assoc.add_source(ref)

                        assoc.add_association_to_graph(g)

                        # finish looping through all alleles

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

        return
开发者ID:JervenBolleman,项目名称:dipper,代码行数:104,代码来源:WormBase.py


注:本文中的dipper.models.Genotype.Genotype.addAlleleOfGene方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。