本文整理汇总了Python中dipper.models.Model.Model.addBlankNodeAnnotation方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addBlankNodeAnnotation方法的具体用法?Python Model.addBlankNodeAnnotation怎么用?Python Model.addBlankNodeAnnotation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.addBlankNodeAnnotation方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_disease_association
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def process_disease_association(self, limit):
raw = '/'.join((self.rawdir, self.files['disease_assoc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing disease models")
geno = Genotype(g)
line_counter = 0
worm_taxon = 'NCBITaxon:6239'
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
if re.match(r'!', ''.join(row)): # header
continue
line_counter += 1
(db, gene_num, gene_symbol, is_not, disease_id, ref,
eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
gene_class, taxon, date, assigned_by, blank, blank2) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
# TODO add NOT phenotypes
if is_not == 'NOT':
continue
# WB WBGene00000001 aap-1 DOID:2583 PMID:19029536 IEA ENSEMBL:ENSG00000145675|OMIM:615214 D Y110A7A.10 gene taxon:6239 20150612 WB
gene_id = 'WormBase:'+gene_num
# make a variant of the gene
vl = '_:'+'-'.join((gene_num, 'unspecified'))
vl_label = 'some variant of '+gene_symbol
geno.addAffectedLocus(vl, gene_id)
model.addBlankNodeAnnotation(vl)
animal_id = geno.make_experimental_model_with_genotype(
vl, vl_label, worm_taxon, 'worm')
assoc = G2PAssoc(
g, self.name, animal_id,
disease_id, model.object_properties['model_of'])
ref = re.sub(r'WB_REF:', 'WormBase:', ref)
if ref != '':
assoc.add_source(ref)
eco_id = None
if eco_symbol == 'IEA':
eco_id = 'ECO:0000501' # IEA is this now
if eco_id is not None:
assoc.add_evidence(eco_id)
assoc.add_association_to_graph()
return
示例2: _make_pheno_assoc
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def _make_pheno_assoc(self, g, gene_id, gene_symbol, disorder_num,
disorder_label, phene_key):
geno = Genotype(g)
model = Model(g)
disorder_id = ':'.join(('OMIM', disorder_num))
rel_id = model.object_properties['has_phenotype'] # default
rel_label = 'causes'
if re.match(r'\[', disorder_label):
rel_id = model.object_properties['is_marker_for']
rel_label = 'is a marker for'
elif re.match(r'\{', disorder_label):
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
elif re.match(r'\?', disorder_label):
# this is a questionable mapping! skip?
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
evidence = self._map_phene_mapping_code_to_eco(phene_key)
# we actually want the association between the gene and the disease
# to be via an alternate locus not the "wildtype" gene itself.
# so we make an anonymous alternate locus,
# and put that in the association.
# but we only need to do that in the cases when it's not an NCBIGene
# (as that is a sequence feature itself)
if re.match(r'OMIM:', gene_id):
alt_locus = '_:'+re.sub(r':', '', gene_id)+'-'+disorder_num+'VL'
alt_label = gene_symbol.strip()
if alt_label is not None and alt_label != '':
alt_label = \
' '.join(('some variant of', alt_label,
'that', rel_label, disorder_label))
else:
alt_label = None
model.addIndividualToGraph(
alt_locus, alt_label, Genotype.genoparts['variant_locus'])
geno.addAffectedLocus(alt_locus, gene_id)
model.addBlankNodeAnnotation(alt_locus)
else:
# assume it's already been added
alt_locus = gene_id
assoc = G2PAssoc(g, self.name, alt_locus, disorder_id, rel_id)
assoc.add_evidence(evidence)
assoc.add_association_to_graph()
return
示例3: _process_diseasegene
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def _process_diseasegene(self, limit):
"""
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
model = Model(g)
myfile = '/'.join((self.rawdir, self.files['disease-gene']['file']))
# PYLINT complains iterparse deprecated,
# but as of py 3.4 only the optional & unsupplied parse arg is.
for event, elem in ET.iterparse(myfile):
if elem.tag == 'Disorder':
# get the element name and id, ignoreS element name
# id = elem.get('id') # some internal identifier
disorder_num = elem.find('OrphaNumber').text
disorder_id = 'Orphanet:'+str(disorder_num)
if self.testMode and \
disorder_id not in \
config.get_config()['test_ids']['disease']:
continue
disorder_label = elem.find('Name').text
# make a hash of internal gene id to type for later lookup
gene_iid_to_type = {}
gene_list = elem.find('GeneList')
for gene in gene_list.findall('Gene'):
gene_iid = gene.get('id')
gene_type = gene.find('GeneType').get('id')
gene_iid_to_type[gene_iid] = gene_type
# assuming that these are in the ontology
model.addClassToGraph(disorder_id, disorder_label)
assoc_list = elem.find('DisorderGeneAssociationList')
for a in assoc_list.findall('DisorderGeneAssociation'):
gene_iid = a.find('.//Gene').get('id')
gene_name = a.find('.//Gene/Name').text
gene_symbol = a.find('.//Gene/Symbol').text
gene_num = a.find('./Gene/OrphaNumber').text
gene_id = 'Orphanet:'+str(gene_num)
gene_type_id = \
self._map_gene_type_id(gene_iid_to_type[gene_iid])
model.addClassToGraph(
gene_id, gene_symbol, gene_type_id, gene_name)
syn_list = a.find('./Gene/SynonymList')
if int(syn_list.get('count')) > 0:
for s in syn_list.findall('./Synonym'):
model.addSynonym(gene_id, s.text)
dgtype = a.find('DisorderGeneAssociationType').get('id')
rel_id = self._map_rel_id(dgtype)
dg_label = \
a.find('./DisorderGeneAssociationType/Name').text
if rel_id is None:
logger.warning(
"Cannot map association type (%s) to RO " +
"for association (%s | %s). Skipping.",
dg_label, disorder_label, gene_symbol)
continue
alt_locus_id = '_:'+gene_num+'-'+disorder_num+'VL'
alt_label = \
' '.join(('some variant of', gene_symbol.strip(),
'that is a', dg_label.lower(),
disorder_label))
model.addIndividualToGraph(alt_locus_id, alt_label,
geno.genoparts['variant_locus'])
geno.addAffectedLocus(alt_locus_id, gene_id)
model.addBlankNodeAnnotation(alt_locus_id)
# consider typing the gain/loss-of-function variants like:
# http://sequenceontology.org/browser/current_svn/term/SO:0002054
# http://sequenceontology.org/browser/current_svn/term/SO:0002053
# use "assessed" status to issue an evidence code
# FIXME I think that these codes are sub-optimal
status_code = \
a.find('DisorderGeneAssociationStatus').get('id')
# imported automatically asserted information
# used in automatic assertion
eco_id = 'ECO:0000323'
# Assessed
# TODO are these internal ids stable between releases?
if status_code == '17991':
# imported manually asserted information
# used in automatic assertion
eco_id = 'ECO:0000322'
# Non-traceable author statement ECO_0000034
# imported information in automatic assertion ECO_0000313
#.........这里部分代码省略.........
示例4: _build_gene_disease_model
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def _build_gene_disease_model(
self,
gene_id,
relation_id,
disease_id,
variant_label,
consequence_predicate=None,
consequence_id=None,
allelic_requirement=None,
pmids=None):
"""
Builds gene variant disease model
:return: None
"""
model = Model(self.graph)
geno = Genotype(self.graph)
pmids = [] if pmids is None else pmids
is_variant = False
variant_or_gene = gene_id
variant_id_string = variant_label
variant_bnode = self.make_id(variant_id_string, "_")
if consequence_predicate is not None \
and consequence_id is not None:
is_variant = True
model.addTriple(variant_bnode,
consequence_predicate,
consequence_id)
# Hack to add labels to terms that
# don't exist in an ontology
if consequence_id.startswith(':'):
model.addLabel(consequence_id,
consequence_id.strip(':').replace('_', ' '))
if is_variant:
variant_or_gene = variant_bnode
# Typically we would type the variant using the
# molecular consequence, but these are not specific
# enough for us to make mappings (see translation table)
model.addIndividualToGraph(variant_bnode,
variant_label,
self.globaltt['variant_locus'])
geno.addAffectedLocus(variant_bnode, gene_id)
model.addBlankNodeAnnotation(variant_bnode)
assoc = G2PAssoc(
self.graph, self.name, variant_or_gene, disease_id, relation_id)
assoc.source = pmids
assoc.add_association_to_graph()
if allelic_requirement is not None and is_variant is False:
model.addTriple(
assoc.assoc_id, self.globaltt['has_allelic_requirement'],
allelic_requirement)
if allelic_requirement.startswith(':'):
model.addLabel(
allelic_requirement,
allelic_requirement.strip(':').replace('_', ' '))
示例5: _process_disease2gene
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
#.........这里部分代码省略.........
model = Model(self.g)
(gene_symbol, gene_id, disease_name, disease_id, direct_evidence,
inference_chemical_name, inference_score, omim_ids, pubmed_ids) = row
# we only want the direct associations; skipping inferred for now
if direct_evidence == '' or direct_evidence != 'marker/mechanism':
return
# scrub some of the associations...
# it seems odd to link human genes to the following "diseases"
diseases_to_scrub = [
'MESH:D004283', # dog diseases
'MESH:D004195', # disease models, animal
'MESH:D030342', # genetic diseases, inborn
'MESH:D040181', # genetic dieases, x-linked
'MESH:D020022'] # genetic predisposition to a disease
if disease_id in diseases_to_scrub:
logger.info(
"Skipping association between NCBIGene:%s and %s",
str(gene_id), disease_id)
return
intersect = list(
set(['OMIM:' + str(i) for i in omim_ids.split('|')] +
[disease_id]) & set(self.test_diseaseids))
if self.testMode and (
int(gene_id) not in self.test_geneids or len(intersect) < 1):
return
# there are three kinds of direct evidence:
# (marker/mechanism | marker/mechanism|therapeutic | therapeutic)
# we are only using the "marker/mechanism" for now
# TODO what does it mean for a gene to be therapeutic for disease?
# a therapeutic target?
gene_id = 'NCBIGene:' + gene_id
preferred_disease_id = disease_id
if omim_ids is not None and omim_ids != '':
omim_id_list = re.split(r'\|', omim_ids)
# If there is only one OMIM ID for the Disease ID
# or in the omim_ids list,
# use the OMIM ID preferentially over any MeSH ID.
if re.match(r'OMIM:.*', disease_id):
if len(omim_id_list) > 1:
# the disease ID is an OMIM ID and
# there is more than one OMIM entry in omim_ids.
# Currently no entries satisfy this condition
pass
elif disease_id != ('OMIM:' + omim_ids):
# the disease ID is an OMIM ID and
# there is only one non-equiv OMIM entry in omim_ids
# we preferentially use the disease_id here
logger.warning(
"There may be alternate identifier for %s: %s",
disease_id, omim_ids)
# TODO: What should be done with the alternate disease IDs?
else:
if len(omim_id_list) == 1:
# the disease ID is not an OMIM ID
# and there is only one OMIM entry in omim_ids.
preferred_disease_id = 'OMIM:' + omim_ids
elif len(omim_id_list) > 1:
# This is when the disease ID is not an OMIM ID and
# there is more than one OMIM entry in omim_ids.
pass
# we actually want the association between the gene and the disease
# to be via an alternate locus not the "wildtype" gene itself. So we
# make an anonymous alternate locus, and put that in the association.
alt_id = gene_id + '-' + preferred_disease_id + 'VL'
# can't have colons in the bnodes
alt_locus = re.sub(r':', '', alt_id)
alt_locus = "_:" + alt_locus
alt_label = 'some variant of ' + gene_symbol + ' that is ' \
+ direct_evidence + ' for ' + disease_name
model.addIndividualToGraph(
alt_locus, alt_label,
self.geno.genoparts['variant_locus'])
# assume that the label gets added elsewhere
model.addClassToGraph(gene_id, None)
self.geno.addAffectedLocus(alt_locus, gene_id)
model.addBlankNodeAnnotation(alt_locus)
# not sure if MESH is getting added separately.
# adding labels here for good measure
dlabel = None
if re.match(r'MESH', preferred_disease_id):
dlabel = disease_name
model.addClassToGraph(preferred_disease_id, dlabel)
# Add the disease to gene relationship.
rel_id = self._get_relationship_id(direct_evidence)
refs = self._process_pubmed_ids(pubmed_ids)
self._make_association(alt_locus, preferred_disease_id, rel_id, refs)
return
示例6: _process_omim2gene
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def _process_omim2gene(self, limit=None):
"""
This method maps the OMIM IDs and KEGG gene ID.
Currently split based on the link_type field.
Equivalent link types are mapped as gene XRefs.
Reverse link types are mapped as disease to gene associations.
Original link types are currently skipped.
Triples created:
<kegg_gene_id> is a Gene
<omim_gene_id> is a Gene
<kegg_gene_id>> hasXref <omim_gene_id>
<assoc_id> has subject <omim_disease_id>
<assoc_id> has object <kegg_gene_id>
:param limit:
:return:
"""
LOG.info("Processing OMIM to KEGG gene")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
geno = Genotype(graph)
raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(kegg_gene_id, omim_id, link_type) = row
if self.test_mode and kegg_gene_id not in self.test_ids['genes']:
continue
kegg_gene_id = 'KEGG-' + kegg_gene_id.strip()
omim_id = re.sub(r'omim', 'OMIM', omim_id)
if link_type == 'equivalent':
# these are genes!
# so add them as a class then make equivalence
model.addClassToGraph(omim_id, None)
geno.addGene(kegg_gene_id, None)
# previous: if omim type is not disease-ish then use
# now is: if omim type is gene then use
if omim_id in self.omim_replaced:
repl = self.omim_replaced[omim_id]
for omim in repl:
if omim in self.omim_type and \
self.omim_type[omim] == self.globaltt['gene']:
omim_id = omim
if omim_id in self.omim_type and \
self.omim_type[omim_id] == self.globaltt['gene']:
model.addEquivalentClass(kegg_gene_id, omim_id)
elif link_type == 'reverse':
# make an association between an OMIM ID & the KEGG gene ID
# we do this with omim ids because
# they are more atomic than KEGG ids
alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
alt_label = self.label_hash[alt_locus_id]
model.addIndividualToGraph(
alt_locus_id, alt_label, self.globaltt['variant_locus'])
geno.addAffectedLocus(alt_locus_id, kegg_gene_id)
model.addBlankNodeAnnotation(alt_locus_id)
# Add the disease to gene relationship.
rel = self.globaltt['is marker for']
assoc = G2PAssoc(graph, self.name, alt_locus_id, omim_id, rel)
assoc.add_association_to_graph()
elif link_type == 'original':
# these are sometimes a gene, and sometimes a disease
LOG.info(
'Unable to handle original link for %s-%s',
kegg_gene_id, omim_id)
else:
# don't know what these are
LOG.warning(
'Unhandled link type for %s-%s: %s',
kegg_gene_id, omim_id, link_type)
if (not self.test_mode) and (
limit is not None and reader.line_num > limit):
break
LOG.info("Done with OMIM to KEGG gene")
示例7: _process_kegg_disease2gene
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addBlankNodeAnnotation [as 别名]
def _process_kegg_disease2gene(self, limit=None):
"""
This method creates an association between diseases and
their associated genes. We are being conservative here, and only
processing those diseases for which there is no mapping to OMIM.
Triples created:
<alternate_locus> is an Individual
<alternate_locus> has type <variant_locus>
<alternate_locus> is an allele of <gene_id>
<assoc_id> has subject <disease_id>
<assoc_id> has object <gene_id>
:param limit:
:return:
"""
LOG.info("Processing KEGG disease to gene")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
geno = Genotype(graph)
rel = self.globaltt['is marker for']
noomimset = set()
raw = '/'.join((self.rawdir, self.files['disease_gene']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(gene_id, disease_id) = row
if self.test_mode and gene_id not in self.test_ids['genes']:
continue
gene_id = 'KEGG-' + gene_id.strip()
disease_id = 'KEGG-' + disease_id.strip()
# only add diseases for which
# there is no omim id and not a grouping class
if disease_id not in self.kegg_disease_hash:
# add as a class
disease_label = None
if disease_id in self.label_hash:
disease_label = self.label_hash[disease_id]
if re.search(r'includ', str(disease_label)):
# they use 'including' when it's a grouping class
LOG.info(
"Skipping association because it's a grouping class: %s",
disease_label)
continue
# type this disease_id as a disease
model.addClassToGraph(disease_id, disease_label)
# , class_type=self.globaltt['disease'])
noomimset.add(disease_id)
alt_locus_id = self._make_variant_locus_id(gene_id, disease_id)
alt_label = self.label_hash[alt_locus_id]
model.addIndividualToGraph(
alt_locus_id, alt_label, self.globaltt['variant_locus'])
geno.addAffectedLocus(alt_locus_id, gene_id)
model.addBlankNodeAnnotation(alt_locus_id)
# Add the disease to gene relationship.
assoc = G2PAssoc(graph, self.name, alt_locus_id, disease_id, rel)
assoc.add_association_to_graph()
if not self.test_mode and (
limit is not None and reader.line_num > limit):
break
LOG.info("Done with KEGG disease to gene")
LOG.info("Found %d diseases with no omim id", len(noomimset))