当前位置: 首页>>代码示例>>Python>>正文


Python Genotype.addAffectedLocus方法代码示例

本文整理汇总了Python中dipper.models.Genotype.Genotype.addAffectedLocus方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addAffectedLocus方法的具体用法?Python Genotype.addAffectedLocus怎么用?Python Genotype.addAffectedLocus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dipper.models.Genotype.Genotype的用法示例。


在下文中一共展示了Genotype.addAffectedLocus方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _add_snp_gene_relation

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _add_snp_gene_relation(self, snp_id, snp_gene_nums,
                               upstream_gene_num, downstream_gene_num):
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        geno = Genotype(g)
        # add the feature as a sequence alteration
        # affecting various genes
        # note that intronic variations don't necessarily list
        # the genes such as for rs10448080  FIXME
        if snp_gene_nums != '':
            for s in re.split(r',', snp_gene_nums):
                s = s.strip()
                # still have to test for this,
                # because sometimes there's a leading comma
                if s != '':
                    gene_id = 'NCBIGene:' + s
                    geno.addAffectedLocus(snp_id, gene_id)

        # add the up and downstream genes if they are available
        if upstream_gene_num != '':
            downstream_gene_id = 'NCBIGene:' + downstream_gene_num
            g.addTriple(
                snp_id,
                Feature.object_properties[
                    r'upstream_of_sequence_of'],
                downstream_gene_id)
        if downstream_gene_num != '':
            upstream_gene_id = 'NCBIGene:' + upstream_gene_num
            g.addTriple(
                snp_id,
                Feature.object_properties[
                    'downstream_of_sequence_of'],
                upstream_gene_id)
开发者ID:kshefchek,项目名称:dipper,代码行数:37,代码来源:GWASCatalog.py

示例2: _add_snp_gene_relation

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _add_snp_gene_relation(
            self, snp_id, snp_gene_nums, upstream_gene_num, downstream_gene_num):
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        geno = Genotype(graph)
        # add the feature as a sequence alteration
        # affecting various genes
        # note that intronic variations don't necessarily list
        # the genes such as for rs10448080  FIXME
        if snp_gene_nums != '':
            for geneid in re.split(r',', snp_gene_nums):
                geneid = geneid.strip()
                # still have to test for this,
                # because sometimes there's a leading comma
                if geneid != '':
                    geno.addAffectedLocus(snp_id, 'NCBIGene:' + geneid)

        # add the up and downstream genes if they are available
        if upstream_gene_num != '':
            downstream_gene_id = 'NCBIGene:' + downstream_gene_num
            graph.addTriple(
                snp_id, self.globaltt['is upstream of sequence of'], downstream_gene_id)
        if downstream_gene_num != '':
            upstream_gene_id = 'NCBIGene:' + upstream_gene_num
            graph.addTriple(
                snp_id, self.globaltt['is downstream of sequence of'], upstream_gene_id)
开发者ID:TomConlin,项目名称:dipper,代码行数:30,代码来源:GWASCatalog.py

示例3: process_disease_association

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def process_disease_association(self, limit):

        raw = '/'.join((self.rawdir, self.files['disease_assoc']['file']))

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        model = Model(g)
        logger.info("Processing disease models")
        geno = Genotype(g)
        line_counter = 0
        worm_taxon = 'NCBITaxon:6239'
        with open(raw, 'r') as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                if re.match(r'!', ''.join(row)):  # header
                    continue
                line_counter += 1
                (db, gene_num, gene_symbol, is_not, disease_id, ref,
                 eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
                 gene_class, taxon, date, assigned_by, blank, blank2) = row

                if self.testMode and gene_num not in self.test_ids['gene']:
                    continue

                # TODO add NOT phenotypes
                if is_not == 'NOT':
                    continue

                # WB	WBGene00000001	aap-1		DOID:2583	PMID:19029536	IEA	ENSEMBL:ENSG00000145675|OMIM:615214	D		Y110A7A.10	gene	taxon:6239	20150612	WB
                gene_id = 'WormBase:'+gene_num

                # make a variant of the gene
                vl = '_:'+'-'.join((gene_num, 'unspecified'))
                vl_label = 'some variant of '+gene_symbol
                geno.addAffectedLocus(vl, gene_id)
                model.addBlankNodeAnnotation(vl)
                animal_id = geno.make_experimental_model_with_genotype(
                    vl, vl_label, worm_taxon, 'worm')

                assoc = G2PAssoc(
                    g, self.name, animal_id,
                    disease_id, model.object_properties['model_of'])
                ref = re.sub(r'WB_REF:', 'WormBase:', ref)
                if ref != '':
                    assoc.add_source(ref)
                eco_id = None
                if eco_symbol == 'IEA':
                    eco_id = 'ECO:0000501'  # IEA is this now
                if eco_id is not None:
                    assoc.add_evidence(eco_id)

                assoc.add_association_to_graph()

        return
开发者ID:kshefchek,项目名称:dipper,代码行数:59,代码来源:WormBase.py

示例4: _make_pheno_assoc

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _make_pheno_assoc(self, g, gene_id, gene_symbol, disorder_num,
                          disorder_label, phene_key):

        geno = Genotype(g)
        model = Model(g)
        disorder_id = ':'.join(('OMIM', disorder_num))
        rel_id = model.object_properties['has_phenotype']  # default
        rel_label = 'causes'
        if re.match(r'\[', disorder_label):
            rel_id = model.object_properties['is_marker_for']
            rel_label = 'is a marker for'
        elif re.match(r'\{', disorder_label):
            rel_id = model.object_properties['contributes_to']
            rel_label = 'contributes to'
        elif re.match(r'\?', disorder_label):
            # this is a questionable mapping!  skip?
            rel_id = model.object_properties['contributes_to']
            rel_label = 'contributes to'

        evidence = self._map_phene_mapping_code_to_eco(phene_key)

        # we actually want the association between the gene and the disease
        # to be via an alternate locus not the "wildtype" gene itself.
        # so we make an anonymous alternate locus,
        # and put that in the association.
        # but we only need to do that in the cases when it's not an NCBIGene
        # (as that is a sequence feature itself)
        if re.match(r'OMIM:', gene_id):
            alt_locus = '_:'+re.sub(r':', '', gene_id)+'-'+disorder_num+'VL'
            alt_label = gene_symbol.strip()
            if alt_label is not None and alt_label != '':
                alt_label = \
                    ' '.join(('some variant of', alt_label,
                              'that', rel_label, disorder_label))
            else:
                alt_label = None

            model.addIndividualToGraph(
                alt_locus, alt_label, Genotype.genoparts['variant_locus'])
            geno.addAffectedLocus(alt_locus, gene_id)
            model.addBlankNodeAnnotation(alt_locus)

        else:
            # assume it's already been added
            alt_locus = gene_id

        assoc = G2PAssoc(g, self.name, alt_locus, disorder_id, rel_id)
        assoc.add_evidence(evidence)
        assoc.add_association_to_graph()

        return
开发者ID:kshefchek,项目名称:dipper,代码行数:53,代码来源:OMIM.py

示例5: _process_diseasegene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _process_diseasegene(self, limit):
        """
        :param limit:
        :return:
        """
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        geno = Genotype(g)
        model = Model(g)

        myfile = '/'.join((self.rawdir, self.files['disease-gene']['file']))

        # PYLINT complains iterparse deprecated,
        # but as of py 3.4 only the optional & unsupplied parse arg is.
        for event, elem in ET.iterparse(myfile):
            if elem.tag == 'Disorder':
                # get the element name and id, ignoreS element name
                # id = elem.get('id') # some internal identifier
                disorder_num = elem.find('OrphaNumber').text

                disorder_id = 'Orphanet:'+str(disorder_num)

                if self.testMode and \
                        disorder_id not in \
                        config.get_config()['test_ids']['disease']:
                    continue

                disorder_label = elem.find('Name').text

                # make a hash of internal gene id to type for later lookup
                gene_iid_to_type = {}
                gene_list = elem.find('GeneList')
                for gene in gene_list.findall('Gene'):
                    gene_iid = gene.get('id')
                    gene_type = gene.find('GeneType').get('id')
                    gene_iid_to_type[gene_iid] = gene_type

                # assuming that these are in the ontology
                model.addClassToGraph(disorder_id, disorder_label)

                assoc_list = elem.find('DisorderGeneAssociationList')
                for a in assoc_list.findall('DisorderGeneAssociation'):
                    gene_iid = a.find('.//Gene').get('id')
                    gene_name = a.find('.//Gene/Name').text
                    gene_symbol = a.find('.//Gene/Symbol').text
                    gene_num = a.find('./Gene/OrphaNumber').text
                    gene_id = 'Orphanet:'+str(gene_num)
                    gene_type_id = \
                        self._map_gene_type_id(gene_iid_to_type[gene_iid])
                    model.addClassToGraph(
                        gene_id, gene_symbol, gene_type_id, gene_name)
                    syn_list = a.find('./Gene/SynonymList')
                    if int(syn_list.get('count')) > 0:
                        for s in syn_list.findall('./Synonym'):
                            model.addSynonym(gene_id, s.text)

                    dgtype = a.find('DisorderGeneAssociationType').get('id')
                    rel_id = self._map_rel_id(dgtype)
                    dg_label = \
                        a.find('./DisorderGeneAssociationType/Name').text
                    if rel_id is None:
                        logger.warning(
                            "Cannot map association type (%s) to RO " +
                            "for association (%s | %s).  Skipping.",
                            dg_label, disorder_label, gene_symbol)
                        continue

                    alt_locus_id = '_:'+gene_num+'-'+disorder_num+'VL'
                    alt_label = \
                        ' '.join(('some variant of', gene_symbol.strip(),
                                  'that is a', dg_label.lower(),
                                  disorder_label))

                    model.addIndividualToGraph(alt_locus_id, alt_label,
                                               geno.genoparts['variant_locus'])
                    geno.addAffectedLocus(alt_locus_id, gene_id)
                    model.addBlankNodeAnnotation(alt_locus_id)

                    # consider typing the gain/loss-of-function variants like:
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002054
                    # http://sequenceontology.org/browser/current_svn/term/SO:0002053

                    # use "assessed" status to issue an evidence code
                    # FIXME I think that these codes are sub-optimal
                    status_code = \
                        a.find('DisorderGeneAssociationStatus').get('id')
                    # imported automatically asserted information
                    # used in automatic assertion
                    eco_id = 'ECO:0000323'
                    # Assessed
                    # TODO are these internal ids stable between releases?
                    if status_code == '17991':
                        # imported manually asserted information
                        # used in automatic assertion
                        eco_id = 'ECO:0000322'
                    # Non-traceable author statement ECO_0000034
                    # imported information in automatic assertion ECO_0000313
#.........这里部分代码省略.........
开发者ID:DoctorBud,项目名称:dipper,代码行数:103,代码来源:Orphanet.py

示例6: _build_gene_disease_model

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _build_gene_disease_model(
            self,
            gene_id,
            relation_id,
            disease_id,
            variant_label,
            consequence_predicate=None,
            consequence_id=None,
            allelic_requirement=None,
            pmids=None):
        """
        Builds gene variant disease model

        :return: None
        """
        model = Model(self.graph)
        geno = Genotype(self.graph)

        pmids = [] if pmids is None else pmids

        is_variant = False
        variant_or_gene = gene_id

        variant_id_string = variant_label
        variant_bnode = self.make_id(variant_id_string, "_")

        if consequence_predicate is not None \
                and consequence_id is not None:
            is_variant = True
            model.addTriple(variant_bnode,
                            consequence_predicate,
                            consequence_id)
            # Hack to add labels to terms that
            # don't exist in an ontology
            if consequence_id.startswith(':'):
                model.addLabel(consequence_id,
                               consequence_id.strip(':').replace('_', ' '))

        if is_variant:
            variant_or_gene = variant_bnode
            # Typically we would type the variant using the
            # molecular consequence, but these are not specific
            # enough for us to make mappings (see translation table)
            model.addIndividualToGraph(variant_bnode,
                                       variant_label,
                                       self.globaltt['variant_locus'])
            geno.addAffectedLocus(variant_bnode, gene_id)
            model.addBlankNodeAnnotation(variant_bnode)

        assoc = G2PAssoc(
            self.graph, self.name, variant_or_gene, disease_id, relation_id)
        assoc.source = pmids
        assoc.add_association_to_graph()

        if allelic_requirement is not None and is_variant is False:
            model.addTriple(
                assoc.assoc_id, self.globaltt['has_allelic_requirement'],
                allelic_requirement)
            if allelic_requirement.startswith(':'):
                model.addLabel(
                    allelic_requirement,
                    allelic_requirement.strip(':').replace('_', ' '))
开发者ID:TomConlin,项目名称:dipper,代码行数:64,代码来源:EBIGene2Phen.py

示例7: CTD

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]

#.........这里部分代码省略.........
                elif disease_id != ('OMIM:' + omim_ids):
                    # the disease ID is an OMIM ID and
                    # there is only one non-equiv OMIM entry in omim_ids
                    # we preferentially use the disease_id here
                    logger.warning(
                        "There may be alternate identifier for %s: %s",
                        disease_id, omim_ids)
                    # TODO: What should be done with the alternate disease IDs?
            else:
                if len(omim_id_list) == 1:
                    # the disease ID is not an OMIM ID
                    # and there is only one OMIM entry in omim_ids.
                    preferred_disease_id = 'OMIM:' + omim_ids
                elif len(omim_id_list) > 1:
                    # This is when the disease ID is not an OMIM ID and
                    # there is more than one OMIM entry in omim_ids.
                    pass

        # we actually want the association between the gene and the disease
        # to be via an alternate locus not the "wildtype" gene itself. So we
        # make an anonymous alternate locus, and put that in the association.
        alt_id = gene_id + '-' + preferred_disease_id + 'VL'
        # can't have colons in the bnodes
        alt_locus = re.sub(r':', '', alt_id)
        alt_locus = "_:" + alt_locus

        alt_label = 'some variant of ' + gene_symbol + ' that is ' \
                    + direct_evidence + ' for ' + disease_name
        model.addIndividualToGraph(
            alt_locus, alt_label,
            self.geno.genoparts['variant_locus'])
        # assume that the label gets added elsewhere
        model.addClassToGraph(gene_id, None)
        self.geno.addAffectedLocus(alt_locus, gene_id)
        model.addBlankNodeAnnotation(alt_locus)

        # not sure if MESH is getting added separately.
        # adding labels here for good measure
        dlabel = None
        if re.match(r'MESH', preferred_disease_id):
            dlabel = disease_name
        model.addClassToGraph(preferred_disease_id, dlabel)

        # Add the disease to gene relationship.
        rel_id = self._get_relationship_id(direct_evidence)
        refs = self._process_pubmed_ids(pubmed_ids)

        self._make_association(alt_locus, preferred_disease_id, rel_id, refs)

        return

    def _make_association(self, subject_id, object_id, rel_id, pubmed_ids):
        """
        Make a reified association given an array of pubmed identifiers.

        Args:
            :param subject_id  id of the subject of the association (gene/chem)
            :param object_id  id of the object of the association (disease)
            :param rel_id  relationship id
            :param pubmed_ids an array of pubmed identifiers
        Returns:
            :return None

        """

        # TODO pass in the relevant Assoc class rather than relying on G2P
开发者ID:kshefchek,项目名称:dipper,代码行数:70,代码来源:CTD.py

示例8: _process_qtls_genetic_location

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]

#.........这里部分代码省略.........

                    model.addIndividualToGraph(
                        dbsnp_id, None,
                        self.globaltt['sequence_alteration'])
                    model.addXref(qtl_id, dbsnp_id)

                gene_id = gene_id.replace('uncharacterized ', '').strip()
                if gene_id is not None and gene_id != '' and gene_id != '.'\
                        and re.fullmatch(r'[^ ]*', gene_id) is not None:

                    # we assume if no src is provided and gene_id is an integer,
                    # then it is an NCBI gene ... (okay, lets crank that back a notch)
                    if gene_id_src == '' and gene_id.isdigit() and \
                            gene_id in self.gene_info:
                        # LOG.info(
                        #    'Warm & Fuzzy saying %s is a NCBI gene for %s',
                        #    gene_id, common_name)
                        gene_id_src = 'NCBIgene'
                    elif gene_id_src == '' and gene_id.isdigit():
                        LOG.warning(
                            'Cold & Prickely saying %s is a NCBI gene for %s',
                            gene_id, common_name)
                        gene_id_src = 'NCBIgene'
                    elif gene_id_src == '':
                        LOG.error(
                            ' "%s" is a NOT NCBI gene for %s', gene_id, common_name)
                        gene_id_src = None

                    if gene_id_src == 'NCBIgene':
                        gene_id = 'NCBIGene:' + gene_id
                        # we will expect that these will get labels elsewhere
                        geno.addGene(gene_id, None)
                        # FIXME what is the right relationship here?
                        geno.addAffectedLocus(qtl_id, gene_id)

                        if dbsnp_id is not None:
                            # add the rsid as a seq alt of the gene_id
                            vl_id = '_:' + re.sub(
                                r':', '', gene_id) + '-' + peak_mark.strip()
                            geno.addSequenceAlterationToVariantLocus(
                                dbsnp_id, vl_id)
                            geno.addAffectedLocus(vl_id, gene_id)

                # add the trait
                model.addClassToGraph(trait_id, trait_name)

                # Add publication
                reference = None
                if re.match(r'ISU.*', pubmed_id):
                    pub_id = 'AQTLPub:'+pubmed_id.strip()
                    reference = Reference(graph, pub_id)
                elif pubmed_id != '':
                    pub_id = 'PMID:' + pubmed_id.strip()
                    reference = Reference(
                        graph, pub_id, self.globaltt['journal article'])

                if reference is not None:
                    reference.addRefToGraph()

                # make the association to the QTL
                assoc = G2PAssoc(
                    graph, self.name, qtl_id, trait_id, self.globaltt['is marker for'])
                assoc.add_evidence(eco_id)
                assoc.add_source(pub_id)

                # create a description from the contents of the file
开发者ID:TomConlin,项目名称:dipper,代码行数:70,代码来源:AnimalQTLdb.py

示例9: _process_haplotype

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _process_haplotype(
            self, hap_id, hap_label, chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology):

        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        geno = Genotype(graph)
        model = Model(graph)
        # add the feature to the graph
        hap_description = None
        if risk_allele_frequency != '' and risk_allele_frequency != 'NR':
            hap_description = str(risk_allele_frequency) + ' [risk allele frequency]'

        model.addIndividualToGraph(
            hap_id, hap_label.strip(), self.globaltt['haplotype'], hap_description)
        geno.addTaxon(self.globaltt["Homo sapiens"], hap_id)

        snp_labels = re.split(r';\s?', hap_label)
        chrom_nums = re.split(r';\s?', chrom_num)
        chrom_positions = re.split(r';\s?', chrom_pos)
        context_list = re.split(r';\s?', context)
        mapped_genes = re.split(r';\s?', mapped_gene)
        snp_curies = list()

        for index, snp in enumerate(snp_labels):
            snp_curie, snp_type = self._get_curie_and_type_from_id(snp)
            if snp_type is None:
                # make blank node
                snp_curie = self.make_id(snp, "_")

            graph.addTriple(hap_id, self.globaltt['has_variant_part'], snp_curie)
            snp_curies.append(snp_curie)

        # courtesy http://stackoverflow.com/a/16720915
        length = len(snp_labels)
        if not all(len(lst) == length
                   for lst in [chrom_nums, chrom_positions, context_list]):
            LOG.warning(
                "Unexpected data field for haplotype %s \n "
                "will not add snp details", hap_label)
            return

        variant_in_gene_count = 0
        for index, snp_curie in enumerate(snp_curies):
            self._add_snp_to_graph(
                snp_curie, snp_labels[index], chrom_nums[index],
                chrom_positions[index], context_list[index])

            if len(mapped_genes) == len(snp_labels):
                so_class = self.resolve(context_list[index])
                # removed the '+' for recursive  one-or-more rdfs:subClassOf  paths
                # just so it did not return an empty graph
                so_query = """
SELECT ?variant_label
    WHERE {{
        {0} rdfs:subClassOf {1} ;
        rdfs:label ?variant_label .
    }}
                """.format(so_class, self.globaltt['gene_variant'])

                query_result = so_ontology.query(so_query)

                if len(list(query_result)) == 1:
                    gene_id = DipperUtil.get_ncbi_id_from_symbol(mapped_genes[index])

                    if gene_id is not None:
                        geno.addAffectedLocus(snp_curie, gene_id)
                        geno.addAffectedLocus(hap_id, gene_id)
                        variant_in_gene_count += 1

                gene_id = DipperUtil.get_ncbi_id_from_symbol(mapped_genes[index])
                if gene_id is not None:
                    graph.addTriple(
                        snp_curie, self.resolve(context_list[index]), gene_id)

            else:
                LOG.warning(
                    "More mapped genes than snps, cannot disambiguate for %s",
                    hap_label)

        # Seperate in case we want to apply a different relation
        # If not this is redundant with triples added above
        if len(mapped_genes) == variant_in_gene_count and len(set(mapped_genes)) == 1:
            gene_id = DipperUtil.get_ncbi_id_from_symbol(mapped_genes[0])
            geno.addAffectedLocus(hap_id, gene_id)

        return
开发者ID:TomConlin,项目名称:dipper,代码行数:91,代码来源:GWASCatalog.py

示例10: _process_haplotype

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _process_haplotype(
            self, hap_id, hap_label, chrom_num, chrom_pos, context,
            risk_allele_frequency, mapped_gene, so_ontology):
        tax_id = 'NCBITaxon:9606'

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        geno = Genotype(g)
        model = Model(g)
        # add the feature to the graph
        hap_description = None
        if risk_allele_frequency != '' and \
                risk_allele_frequency != 'NR':
            hap_description = \
                str(risk_allele_frequency) + \
                ' [risk allele frequency]'

        model.addIndividualToGraph(hap_id, hap_label.strip(),
                                   Feature.types['haplotype'], hap_description)
        geno.addTaxon(tax_id, hap_id)

        snp_labels = re.split(r';\s?', hap_label)
        chrom_nums = re.split(r';\s?', chrom_num)
        chrom_positions = re.split(r';\s?', chrom_pos)
        context_list = re.split(r';\s?', context)
        mapped_genes = re.split(r';\s?', mapped_gene)
        snp_curies = list()

        for index, snp in enumerate(snp_labels):
            snp_curie, snp_type = self._get_curie_and_type_from_id(snp)
            if snp_type is None:
                # make blank node
                snp_curie = self.make_id(snp, "_")

            g.addTriple(hap_id, geno.object_properties['has_variant_part'],
                        snp_curie)
            snp_curies.append(snp_curie)

        # courtesy http://stackoverflow.com/a/16720915
        length = len(snp_labels)
        if not all(len(lst) == length
                   for lst in [chrom_nums, chrom_positions, context_list]):
            logger.warn(
                "Unexpected data field for haplotype {} \n "
                "will not add snp details".format(hap_label))
            return

        variant_in_gene_count = 0
        for index, snp_curie in enumerate(snp_curies):
            self._add_snp_to_graph(
                snp_curie, snp_labels[index], chrom_nums[index],
                chrom_positions[index], context_list[index])

            if len(mapped_genes) == len(snp_labels):

                so_class = self._map_variant_type(context_list[index])

                if so_class is None:
                    raise ValueError("Unknown SO class {} in haplotype {}"
                                     .format(context_list[index], hap_label))
                so_query = """
                    SELECT ?variant_label
                    WHERE {{
                        {0} rdfs:subClassOf+ SO:0001564 ;
                            rdfs:label ?variant_label .
                    }}
                """.format(so_class)

                query_result = so_ontology.query(so_query)
                if len(list(query_result)) > 0:
                    gene_id = DipperUtil.get_ncbi_id_from_symbol(
                        mapped_genes[index])
                    if gene_id is not None:
                        geno.addAffectedLocus(snp_curie, gene_id)
                        geno.addAffectedLocus(hap_id, gene_id)
                        variant_in_gene_count += 1

                if context_list[index] == 'upstream_gene_variant':
                    gene_id = DipperUtil.get_ncbi_id_from_symbol(
                        mapped_genes[index])
                    if gene_id is not None:
                        g.addTriple(
                            snp_curie,
                            Feature.object_properties[
                                'upstream_of_sequence_of'],
                            gene_id)
                elif context_list[index] == 'downstream_gene_variant':
                    gene_id = DipperUtil.get_ncbi_id_from_symbol(
                        mapped_genes[index])
                    if gene_id is not None:
                        g.addTriple(
                            snp_curie,
                            Feature.object_properties[
                                'downstream_of_sequence_of'],
                            gene_id)
            else:
                logger.warn("More mapped genes than snps, "
                            "cannot disambiguate for {}".format(hap_label))
#.........这里部分代码省略.........
开发者ID:kshefchek,项目名称:dipper,代码行数:103,代码来源:GWASCatalog.py

示例11: _process_omim2gene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _process_omim2gene(self, limit=None):
        """
        This method maps the OMIM IDs and KEGG gene ID.
        Currently split based on the link_type field.
        Equivalent link types are mapped as gene XRefs.
        Reverse link types are mapped as disease to gene associations.
        Original link types are currently skipped.

        Triples created:
        <kegg_gene_id> is a Gene
        <omim_gene_id> is a Gene
        <kegg_gene_id>> hasXref <omim_gene_id>

        <assoc_id> has subject <omim_disease_id>
        <assoc_id> has object <kegg_gene_id>
        :param limit:

        :return:
        """

        LOG.info("Processing OMIM to KEGG gene")
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        model = Model(graph)
        geno = Genotype(graph)
        raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in reader:
                (kegg_gene_id, omim_id, link_type) = row

                if self.test_mode and kegg_gene_id not in self.test_ids['genes']:
                    continue

                kegg_gene_id = 'KEGG-' + kegg_gene_id.strip()
                omim_id = re.sub(r'omim', 'OMIM', omim_id)
                if link_type == 'equivalent':
                    # these are genes!
                    # so add them as a class then make equivalence
                    model.addClassToGraph(omim_id, None)
                    geno.addGene(kegg_gene_id, None)

                    # previous: if omim type is not disease-ish then use
                    # now is:   if omim type is gene then use

                    if omim_id in self.omim_replaced:
                        repl = self.omim_replaced[omim_id]
                        for omim in repl:
                            if omim in self.omim_type and \
                                    self.omim_type[omim] == self.globaltt['gene']:
                                omim_id = omim
                    if omim_id in self.omim_type and \
                            self.omim_type[omim_id] == self.globaltt['gene']:
                        model.addEquivalentClass(kegg_gene_id, omim_id)
                elif link_type == 'reverse':
                    # make an association between an OMIM ID & the KEGG gene ID
                    # we do this with omim ids because
                    # they are more atomic than KEGG ids

                    alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
                    alt_label = self.label_hash[alt_locus_id]
                    model.addIndividualToGraph(
                        alt_locus_id, alt_label, self.globaltt['variant_locus'])
                    geno.addAffectedLocus(alt_locus_id, kegg_gene_id)
                    model.addBlankNodeAnnotation(alt_locus_id)

                    # Add the disease to gene relationship.
                    rel = self.globaltt['is marker for']
                    assoc = G2PAssoc(graph, self.name, alt_locus_id, omim_id, rel)
                    assoc.add_association_to_graph()

                elif link_type == 'original':
                    # these are sometimes a gene, and sometimes a disease
                    LOG.info(
                        'Unable to handle original link for %s-%s',
                        kegg_gene_id, omim_id)
                else:
                    # don't know what these are
                    LOG.warning(
                        'Unhandled link type for %s-%s: %s',
                        kegg_gene_id, omim_id, link_type)

                if (not self.test_mode) and (
                        limit is not None and reader.line_num > limit):
                    break
        LOG.info("Done with OMIM to KEGG gene")
开发者ID:TomConlin,项目名称:dipper,代码行数:90,代码来源:KEGG.py

示例12: _process_kegg_disease2gene

# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addAffectedLocus [as 别名]
    def _process_kegg_disease2gene(self, limit=None):
        """
        This method creates an association between diseases and
        their associated genes. We are being conservative here, and only
        processing those diseases for which there is no mapping to OMIM.

        Triples created:
        <alternate_locus> is an Individual
        <alternate_locus> has type <variant_locus>
        <alternate_locus> is an allele of  <gene_id>

        <assoc_id> has subject <disease_id>
        <assoc_id> has object <gene_id>
        :param limit:
        :return:

        """

        LOG.info("Processing KEGG disease to gene")
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        model = Model(graph)
        geno = Genotype(graph)
        rel = self.globaltt['is marker for']
        noomimset = set()
        raw = '/'.join((self.rawdir, self.files['disease_gene']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in reader:
                (gene_id, disease_id) = row

                if self.test_mode and gene_id not in self.test_ids['genes']:
                    continue

                gene_id = 'KEGG-' + gene_id.strip()
                disease_id = 'KEGG-' + disease_id.strip()

                # only add diseases for which
                # there is no omim id and not a grouping class
                if disease_id not in self.kegg_disease_hash:
                    # add as a class
                    disease_label = None
                    if disease_id in self.label_hash:
                        disease_label = self.label_hash[disease_id]
                    if re.search(r'includ', str(disease_label)):
                        # they use 'including' when it's a grouping class
                        LOG.info(
                            "Skipping association because it's a grouping class: %s",
                            disease_label)
                        continue
                    # type this disease_id as a disease
                    model.addClassToGraph(disease_id, disease_label)
                    # , class_type=self.globaltt['disease'])
                    noomimset.add(disease_id)
                    alt_locus_id = self._make_variant_locus_id(gene_id, disease_id)
                    alt_label = self.label_hash[alt_locus_id]
                    model.addIndividualToGraph(
                        alt_locus_id, alt_label, self.globaltt['variant_locus'])
                    geno.addAffectedLocus(alt_locus_id, gene_id)
                    model.addBlankNodeAnnotation(alt_locus_id)
                    # Add the disease to gene relationship.
                    assoc = G2PAssoc(graph, self.name, alt_locus_id, disease_id, rel)
                    assoc.add_association_to_graph()

                if not self.test_mode and (
                        limit is not None and reader.line_num > limit):
                    break

        LOG.info("Done with KEGG disease to gene")
        LOG.info("Found %d diseases with no omim id", len(noomimset))
开发者ID:TomConlin,项目名称:dipper,代码行数:74,代码来源:KEGG.py


注:本文中的dipper.models.Genotype.Genotype.addAffectedLocus方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。