本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addIndividualToGraph方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addIndividualToGraph方法的具体用法?Python GraphUtils.addIndividualToGraph怎么用?Python GraphUtils.addIndividualToGraph使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addIndividualToGraph方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: addRefToGraph
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def addRefToGraph(self, g):
gu = GraphUtils(curie_map.get())
n = self.short_citation
if n is None:
n = self.title
if self.ref_url is not None:
ref_uri = URIRef(self.ref_url)
g.add((ref_uri, DC['title'], Literal(self.title)))
g.add((ref_uri, RDF['type'], gu.getNode(self.ref_type)))
g.add((ref_uri, RDFS['label'], Literal(n)))
elif self.ref_id is not None:
gu.addIndividualToGraph(g, self.ref_id, n, self.ref_type)
if self.title is not None:
gu.addTitle(g, self.ref_id, self.title)
else:
# should never be true
logger.error("You are missing an identifier for a reference.")
# TODO what is the property here to add the date?
# if self.year is not None:
# gu.addTriple()
# if self.author_list is not None:
# for a in self.author_list:
# gu.addTriple(
# g, self.ref_id, self.props['has_author'], a, True)
return
示例2: _process_collection
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _process_collection(self, collection_id, label, page):
"""
This function will process the data supplied internally
about the repository from Coriell.
Triples:
Repository a ERO:collection
rdf:label Literal(label)
foaf:page Literal(page)
:param collection_id:
:param label:
:param page:
:return:
"""
# ############# BUILD THE CELL LINE REPOSITORY #############
for g in [self.graph, self.testgraph]:
# FIXME: How to devise a label for each repository?
gu = GraphUtils(curie_map.get())
repo_id = 'CoriellCollection:'+collection_id
repo_label = label
repo_page = page
gu.addIndividualToGraph(
g, repo_id, repo_label, self.terms['collection'])
gu.addPage(g, repo_id, repo_page)
return
示例3: _get_process_allelic_variants
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _get_process_allelic_variants(self, entry, g):
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
du = DipperUtil()
if entry is not None:
publist = {} # to hold the entry-specific publication mentions for the allelic variants
entry_num = entry['mimNumber']
# process the ref list just to get the pmids
ref_to_pmid = self._get_pubs(entry, g)
if 'allelicVariantList' in entry:
allelicVariantList = entry['allelicVariantList']
for al in allelicVariantList:
al_num = al['allelicVariant']['number']
al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
al_label = None
al_description = None
if al['allelicVariant']['status'] == 'live':
publist[al_id] = set()
if 'mutations' in al['allelicVariant']:
al_label = al['allelicVariant']['mutations']
if 'text' in al['allelicVariant']:
al_description = al['allelicVariant']['text']
m = re.findall('\{(\d+)\:', al_description)
publist[al_id] = set(m)
geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
geno.object_properties['is_sequence_variant_instance_of'])
for r in publist[al_id]:
pmid = ref_to_pmid[int(r)]
gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
# look up the pubmed id in the list of references
if 'dbSnps' in al['allelicVariant']:
dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
for dnum in dbsnp_ids:
did = 'dbSNP:'+dnum.strip()
gu.addIndividualToGraph(g, did, None)
gu.addEquivalentClass(g, al_id, did)
if 'clinvarAccessions' in al['allelicVariant']:
# clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
for rnum in rcv_ids:
rid = 'ClinVar:'+rnum
gu.addXref(g, al_id, rid)
gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
elif re.search('moved', al['allelicVariant']['status']):
# for both 'moved' and 'removed'
moved_ids = None
if 'movedTo' in al['allelicVariant']:
moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
moved_ids = [moved_id]
gu.addDeprecatedIndividual(g, al_id, moved_ids)
else:
logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
# end loop allelicVariantList
return
示例4: _get_gene2pubmed
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _get_gene2pubmed(self, limit):
"""
Loops through the gene2pubmed file and adds a simple triple to say that a given publication
is_about a gene. Publications are added as NamedIndividuals.
:param limit:
:return:
"""
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
is_about = gu.getNode(gu.object_properties['is_about'])
logger.info("Processing Gene records")
line_counter = 0
myfile = '/'.join((self.rawdir, self.files['gene2pubmed']['file']))
logger.info("FILE: %s", myfile)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match('^#', line):
continue
(tax_num, gene_num, pubmed_num) = line.split('\t')
##### set filter=None in init if you don't want to have a filter
#if self.filter is not None:
# if ((self.filter == 'taxids' and (int(tax_num) not in self.tax_ids))
# or (self.filter == 'geneids' and (int(gene_num) not in self.gene_ids))):
# continue
##### end filter
if self.testMode and int(gene_num) not in self.gene_ids:
continue
if int(tax_num) not in self.tax_ids:
continue
if gene_num == '-' or pubmed_num == '-':
continue
line_counter += 1
gene_id = ':'.join(('NCBIGene', gene_num))
pubmed_id = ':'.join(('PMID', pubmed_num))
# add the gene, in case it hasn't before
gu.addClassToGraph(g, gene_id, None)
# add the publication as a NamedIndividual
gu.addIndividualToGraph(g, pubmed_id, None, None) # add type publication
self.graph.add((gu.getNode(pubmed_id), is_about, gu.getNode(gene_id)))
if not self.testMode and limit is not None and line_counter > limit:
break
return
示例5: _process_straininfo
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
gu = GraphUtils(curie_map.get())
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
f.readline() # read the header row; skip
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:'+str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:'+str(mpd_strainid)
gu.addIndividualToGraph(g, strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
gu.addSynonym(g, strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
gu.addSameIndividual(g, strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
gu.addSameIndividual(g, strain_id, reiken_id)
else:
if url != '':
gu.addXref(g, strain_id, url, True)
if vendor != '':
gu.addXref(
g, strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
gu.addDescription(g, strain_id, desc)
# TODO make the panels as a resource collection
return
示例6: addRefToGraph
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def addRefToGraph(self, g):
gu = GraphUtils(curie_map.get())
n = self.short_citation
if n is None:
n = self.title
gu.addIndividualToGraph(g, self.ref_id, n, self.ref_type)
if self.title is not None:
gu.addTitle(g, self.ref_id, self.title)
# todo what is the property here to add the date?
#if self.year is not None:
# gu.addTriple()
#if self.author_list is not None:
# for a in self.author_list:
# gu.addTriple(g, self.ref_id, self.props['has_author'], a, True)
return
示例7: CTD
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
gene_id = 'NCBIGene:'+gene_id
preferred_disease_id = disease_id
if omim_ids is not None and omim_ids != '':
omim_id_list = re.split('\|', omim_ids)
# If there is only one OMIM ID for the Disease ID or in the omim_ids list,
# use the OMIM ID preferentially over any MeSH ID.
if re.match('OMIM:.*', disease_id):
if len(omim_id_list) > 1:
# the disease ID is an OMIM ID and there is more than one OMIM entry in omim_ids.
# Currently no entries satisfy this condition
pass
elif disease_id != ('OMIM:'+omim_ids):
# the disease ID is an OMIM ID and there is only one non-equiv OMIM entry in omim_ids
# we preferentially use the disease_id here
logger.warn("There may be alternate identifier for %s: %s", disease_id, omim_ids)
# TODO: What should be done with the alternate disease IDs?
else:
if len(omim_id_list) == 1:
# the disease ID is not an OMIM ID and there is only one OMIM entry in omim_ids.
preferred_disease_id = 'OMIM:'+omim_ids
elif len(omim_id_list) > 1:
# This is when the disease ID is not an OMIM ID and there is more than one OMIM entry in omim_ids.
pass
# we actually want the association between the gene and the disease to be via an alternate locus
# not the "wildtype" gene itself.
# so we make an anonymous alternate locus, and put that in the association.
alt_locus = '_'+gene_id+'-'+preferred_disease_id+'VL'
alt_locus = re.sub(':', '', alt_locus) # can't have colons in the bnodes
if self.nobnodes:
alt_locus = ':'+alt_locus
alt_label = 'some variant of '+gene_symbol+' that is '+direct_evidence+' for '+disease_name
self.gu.addIndividualToGraph(self.g, alt_locus, alt_label, self.geno.genoparts['variant_locus'])
self.gu.addClassToGraph(self.g, gene_id, None) # assume that the label gets added elsewhere
self.geno.addAlleleOfGene(alt_locus, gene_id)
# not sure if MESH is getting added separately. adding labels here for good measure
dlabel = None
if re.match('MESH', preferred_disease_id):
dlabel = disease_name
self.gu.addClassToGraph(self.g, preferred_disease_id, dlabel)
# Add the disease to gene relationship.
rel_id = self._get_relationship_id(direct_evidence)
refs = self._process_pubmed_ids(pubmed_ids)
self._make_association(alt_locus, preferred_disease_id, rel_id, refs)
return
def _make_association(self, subject_id, object_id, rel_id, pubmed_ids):
"""
Make a reified association given an array of pubmed identifiers.
Args:
:param subject_id id of the subject of the association (gene/chem)
:param object_id id of the object of the association (disease)
:param rel_id relationship id
:param pubmed_ids an array of pubmed identifiers
Returns:
:return None
"""
# TODO pass in the relevant Assoc class rather than relying on G2P
assoc = G2PAssoc(self.name, subject_id, object_id, rel_id)
示例8: Feature
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
def addFeatureToGraph(
self, graph, add_region=True, region_id=None,
feature_as_class=False):
"""
We make the assumption here that all features are instances.
The features are located on a region,
which begins and ends with faldo:Position
The feature locations leverage the Faldo model,
which has a general structure like:
Triples:
feature_id a feature_type (individual)
faldo:location region_id
region_id a faldo:region
faldo:begin start_position
faldo:end end_position
start_position a
(any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
faldo:position Integer(numeric position)
faldo:reference reference_id
end_position a
(any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
faldo:position Integer(numeric position)
faldo:reference reference_id
:param graph:
:return:
"""
if feature_as_class:
self.gu.addClassToGraph(graph, self.id, self.label, self.type,
self.description)
else:
self.gu.addIndividualToGraph(graph, self.id, self.label, self.type,
self.description)
if self.start is None and self.stop is None:
add_region = False
if add_region:
# create a region that has the begin/end positions
regionchr = re.sub(r'\w+\:_?', '', self.start['reference'])
if region_id is None:
# in case the values are undefined
# if we know only one of the coordinates,
# then we'll add an "unknown" other.
st = sp = 'UN'
strand = None
if self.start is not None and \
self.start['coordinate'] is not None:
st = str(self.start['coordinate'])
strand = self._getStrandStringFromPositionTypes(
self.start['type'])
if self.stop is not None and\
self.stop['coordinate'] is not None:
sp = str(self.stop['coordinate'])
if strand is not None:
strand = self._getStrandStringFromPositionTypes(
self.stop['type'])
# assume that the strand is the same for both start and stop.
# this will need to be fixed in the future
region_items = [regionchr, st, sp]
if strand is not None:
region_items += [strand]
region_id = '-'.join(region_items)
rid = region_id
示例9: _process_phenotype_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
# ataxia [MP:0001393] ,hypoactivity [MP:0001402] ...
# mp_ids are now a comma delimited list
# with MP terms in brackets
phenotype_ids = []
if mp_ids != '':
for i in re.split(r',', mp_ids):
i = i.strip()
mps = re.search(r'\[(.*)\]', i)
if mps is not None:
mp_id = mps.group(1).strip()
phenotype_ids.append(mp_id)
# pubmed ids are space delimited
pubmed_ids = []
if pubmed_nums.strip() != '':
for i in re.split(r'\s+', pubmed_nums):
pmid = 'PMID:'+i.strip()
pubmed_ids.append(pmid)
r = Reference(pmid,
Reference.ref_types['journal_article'])
r.addRefToGraph(g)
# https://www.mmrrc.org/catalog/sds.php?mmrrc_id=00001
# is a good example of 4 genotype parts
gu.addClassToGraph(g, mouse_taxon, None)
if research_areas.strip() == '':
research_areas = None
else:
research_areas = 'Research Areas: '+research_areas
strain_type = mouse_taxon
if strain_state == 'ES':
strain_type = stem_cell_class
gu.addIndividualToGraph(
g, strain_id, strain_label, strain_type,
research_areas) # an inst of mouse??
gu.makeLeader(g, strain_id)
# phenotypes are associated with the alleles
for pid in phenotype_ids:
# assume the phenotype label is in the ontology
gu.addClassToGraph(g, pid, None)
if mgi_allele_id is not None and mgi_allele_id != '':
assoc = G2PAssoc(self.name, mgi_allele_id, pid,
gu.object_properties['has_phenotype'])
for p in pubmed_ids:
assoc.add_source(p)
assoc.add_association_to_graph(g)
else:
logger.info("Phenotypes and no allele for %s",
strain_id)
if not self.testMode and (
limit is not None and line_counter > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for v in variants:
示例10: Genotype
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
'hemizygous-y': 'GENO:0000604',
'hemizygous-x': 'GENO:0000605',
'homozygous': 'GENO:0000136',
'hemizygous': 'GENO:0000606',
'complex_heterozygous': 'GENO:0000402',
'simple_heterozygous': 'GENO:0000458'
}
properties = object_properties.copy()
properties.update(annotation_properties)
def __init__(self, graph):
self.gu = GraphUtils(curie_map.get())
self.graph = graph
self.gu.loadProperties(self.graph, self.object_properties, self.gu.OBJPROP)
return
def addGenotype(self, genotype_id, genotype_label, genotype_type=None, genotype_description=None):
"""
If a genotype_type is not supplied, we will default to 'intrinsic_genotype'
:param genotype_id:
:param genotype_label:
:param genotype_type:
:param genotype_description:
:return:
"""
if genotype_type is None:
genotype_type = self.genoparts['intrinsic_genotype']
self.gu.addIndividualToGraph(self.graph, genotype_id, genotype_label, genotype_type, genotype_description)
return
def addAllele(self, allele_id, allele_label, allele_type=None, allele_description=None):
"""
Make an allele object. If no allele_type is added, it will default to a geno:allele
:param allele_id: curie for allele (required)
:param allele_label: label for allele (required)
:param allele_type: id for an allele type (optional, recommended SO or GENO class)
:param allele_description: a free-text description of the allele
:return:
"""
# TODO should we accept a list of allele types?
if (allele_type is None):
allele_type = self.genoparts['allele'] #TODO is this a good idea?
self.gu.addIndividualToGraph(self.graph, allele_id, allele_label, allele_type, allele_description)
return
def addGene(self, gene_id, gene_label, gene_type=None, gene_description=None):
if gene_type is None:
gene_type = self.genoparts['gene']
# genes are classes
self.gu.addClassToGraph(self.graph, gene_id, gene_label, gene_type, gene_description)
return
def addConstruct(self, construct_id, construct_label, construct_type=None, construct_description=None):
# TODO add base type for construct
# if (constrcut_type is None):
# constrcut_type=self.construct_base_type
self.gu.addIndividualToGraph(self.graph, construct_id, construct_label, construct_type, construct_description)
示例11: _process_diseasegene
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _process_diseasegene(self, limit):
"""
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
myfile = '/'.join((self.rawdir, self.files['disease-gene']['file']))
# PYLINT complains iterparse deprecated,
# but as of py 3.4 only the optional & unsupplied parse arg is.
for event, elem in ET.iterparse(myfile):
if elem.tag == 'Disorder':
# get the element name and id, ignoreS element name
# id = elem.get('id') # some internal identifier
disorder_num = elem.find('OrphaNumber').text
disorder_id = 'Orphanet:'+str(disorder_num)
if self.testMode and \
disorder_id not in \
config.get_config()['test_ids']['disease']:
continue
disorder_label = elem.find('Name').text
# make a hash of internal gene id to type for later lookup
gene_iid_to_type = {}
gene_list = elem.find('GeneList')
for gene in gene_list.findall('Gene'):
gene_iid = gene.get('id')
gene_type = gene.find('GeneType').get('id')
gene_iid_to_type[gene_iid] = gene_type
# assuming that these are in the ontology
gu.addClassToGraph(g, disorder_id, disorder_label)
assoc_list = elem.find('DisorderGeneAssociationList')
for a in assoc_list.findall('DisorderGeneAssociation'):
gene_iid = a.find('.//Gene').get('id')
gene_name = a.find('.//Gene/Name').text
gene_symbol = a.find('.//Gene/Symbol').text
gene_num = a.find('./Gene/OrphaNumber').text
gene_id = 'Orphanet:'+str(gene_num)
gene_type_id = \
self._map_gene_type_id(gene_iid_to_type[gene_iid])
gu.addClassToGraph(
g, gene_id, gene_symbol, gene_type_id, gene_name)
syn_list = a.find('./Gene/SynonymList')
if int(syn_list.get('count')) > 0:
for s in syn_list.findall('./Synonym'):
gu.addSynonym(g, gene_id, s.text)
dgtype = a.find('DisorderGeneAssociationType').get('id')
rel_id = self._map_rel_id(dgtype)
dg_label = \
a.find('./DisorderGeneAssociationType/Name').text
if rel_id is None:
logger.warning(
"Cannot map association type (%s) to RO " +
"for association (%s | %s). Skipping.",
dg_label, disorder_label, gene_symbol)
continue
alt_locus_id = '_'+gene_num+'-'+disorder_num+'VL'
alt_label = \
' '.join(('some variant of', gene_symbol.strip(),
'that is a', dg_label.lower(),
disorder_label))
if self.nobnodes:
alt_locus_id = ':'+alt_locus_id
gu.addIndividualToGraph(g, alt_locus_id, alt_label,
geno.genoparts['variant_locus'])
geno.addAlleleOfGene(alt_locus_id, gene_id)
# consider typing the gain/loss-of-function variants like:
# http://sequenceontology.org/browser/current_svn/term/SO:0002054
# http://sequenceontology.org/browser/current_svn/term/SO:0002053
# use "assessed" status to issue an evidence code
# FIXME I think that these codes are sub-optimal
status_code = \
a.find('DisorderGeneAssociationStatus').get('id')
# imported automatically asserted information
# used in automatic assertion
eco_id = 'ECO:0000323'
# Assessed
# TODO are these internal ids stable between releases?
if status_code == '17991':
# imported manually asserted information
# used in automatic assertion
eco_id = 'ECO:0000322'
# Non-traceable author statement ECO_0000034
# imported information in automatic assertion ECO_0000313
#.........这里部分代码省略.........
示例12: _process_omim2gene
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _process_omim2gene(self, limit=None):
"""
This method maps the OMIM IDs and KEGG gene ID. Currently split based on the link_type field.
Equivalent link types are mapped as gene XRefs.
Reverse link types are mapped as disease to gene associations.
Original link types are currently skipped.
Triples created:
<kegg_gene_id> is a Gene
<omim_gene_id> is a Gene
<kegg_gene_id>> hasXref <omim_gene_id>
<assoc_id> has subject <omim_disease_id>
<assoc_id> has object <kegg_gene_id>
:param limit:
:return:
"""
logger.info("Processing OMIM to KEGG gene")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['omim2gene']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(kegg_gene_id, omim_id, link_type) = row
if self.testMode and kegg_gene_id not in self.test_ids['genes']:
continue
kegg_gene_id = 'KEGG-'+kegg_gene_id.strip()
omim_id = re.sub('omim', 'OMIM', omim_id)
if link_type == 'equivalent':
# these are genes! so add them as a class then make equivalence
gu.addClassToGraph(g, omim_id, None)
geno.addGene(kegg_gene_id, None)
gu.addEquivalentClass(g, kegg_gene_id, omim_id)
elif link_type == 'reverse':
# make an association between an OMIM ID and the KEGG gene ID
# we do this with omim ids because they are more atomic than KEGG ids
alt_locus_id = self._make_variant_locus_id(kegg_gene_id, omim_id)
alt_label = self.label_hash[alt_locus_id]
gu.addIndividualToGraph(g, alt_locus_id, alt_label, geno.genoparts['variant_locus'])
geno.addAlleleOfGene(alt_locus_id, kegg_gene_id)
# Add the disease to gene relationship.
rel = gu.object_properties['is_marker_for']
assoc = G2PAssoc(self.name, alt_locus_id, omim_id, rel)
assoc.add_association_to_graph(g)
elif link_type == 'original':
# these are sometimes a gene, and sometimes a disease
logger.info('Unable to handle original link for %s-%s', kegg_gene_id, omim_id)
else:
# don't know what these are
logger.warn('Unhandled link type for %s-%s: %s', kegg_gene_id, omim_id, link_type)
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with OMIM to KEGG gene")
gu.loadProperties(g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
gu.loadProperties(g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
gu.loadProperties(g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)
return
示例13: Decipher
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
r = Reference(
pmid, Reference.ref_types['journal_article'])
r.addRefToGraph(g)
assoc.add_source(pmid)
assoc.add_association_to_graph(g)
else:
# these are unmapped to a disease id.
# note that some match OMIM disease labels
# but the identifiers are just not included.
# TODO consider mapping to OMIM or DOIDs in other ways
logger.warning(
"No omim id on line %d\n%s", line_counter, str(row))
unmapped_omim_counter += 1
# TODO hpo phenotypes
# since the DDG2P file is not documented,
# I don't know what the HPO annotations are actually about
# are they about the gene? the omim disease? something else?
# So, we wont create associations until this is clarified
if not self.testMode and limit is not None \
and line_counter > limit:
break
myzip.close()
logger.warning(
"gene-disorder associations with no omim id: %d",
unmapped_omim_counter)
logger.warning("unmapped gene count: %d", unmapped_gene_count)
gu.loadProperties(g, G2PAssoc.object_properties, gu.OBJPROP)
gu.loadProperties(g, G2PAssoc.datatype_properties, gu.DATAPROP)
gu.loadProperties(g, G2PAssoc.annotation_properties, gu.ANNOTPROP)
return
def make_allele_by_consequence(self, consequence, gene_id, gene_symbol):
"""
Given a "consequence" label that describes a variation type,
create an anonymous variant of the specified gene as an instance of
that consequence type.
:param consequence:
:param gene_id:
:param gene_symbol:
:return: allele_id
"""
allele_id = None
# Loss of function : Nonsense, frame-shifting indel,
# essential splice site mutation, whole gene deletion or any other
# mutation where functional analysis demonstrates clear reduction
# or loss of function
# All missense/in frame : Where all the mutations described in the data
# source are either missense or in frame deletions and there is no
# evidence favoring either loss-of-function, activating or
# dominant negative effect
# Dominant negative : Mutation within one allele of a gene that creates
# a significantly greater deleterious effect on gene product
# function than a monoallelic loss of function mutation
# Activating : Mutation, usually missense that results in
# a constitutive functional activation of the gene product
# Increased gene dosage : Copy number variation that increases
# the functional dosage of the gene
# Cis-regulatory or promotor mutation : Mutation in cis-regulatory
# elements that lies outwith the known transcription unit and
# promotor of the controlled gene
# Uncertain : Where the exact nature of the mutation is unclear or
# not recorded
so_type = { # type of variant
'Loss of function': 'SO:0002054', # loss of function
'All missense/in frame': 'SO:0001583', # missense
'Dominant negative': 'SO:0002052', # dominant negative
'Activating': 'SO:0002053', # gain of function
'Increased gene dosage': 'SO:0001742', # copy number gain
# regulatory region
'Cis-regulatory or promotor mutation': 'SO:0001566',
'Uncertain': 'SO:0001060', # generic sequence
'5 or 3UTR mutation': 'SO:0001622', # UTR
}
type_id = so_type.get(consequence)
if type_id is None:
logger.warning("Consequence type unmapped: %s", str(consequence))
type_id = 'SO:0001060' # sequence variant
# make the allele
allele_id = ''.join((gene_id, type_id))
allele_id = re.sub(r':', '', allele_id)
allele_id = '_'+allele_id # make this a BNode
if self.nobnodes:
allele_id = ':'+allele_id
allele_label = ' '.join((consequence, 'allele in', gene_symbol))
self.gu.addIndividualToGraph(self.g, allele_id, allele_label, type_id)
self.geno.addAlleleOfGene(allele_id, gene_id)
return allele_id
示例14: Environment
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
class Environment():
"""
These methods provide convenient methods
to add items related to an experimental environment
and it's parts to a supplied graph.
This is a stub ready for expansion.
"""
# special genotype parts mapped to their GENO and SO classes
# that we explicitly reference here
environment_parts = {
'environmental_system': 'ENVO:01000254',
'environmental_condition': 'XCO:0000000',
'morpholio_reagent': 'REO:0000042',
'talen_reagent': 'REO:0001022',
'crispr_reagent': 'REO:crispr_TBD'
}
object_properties = {
'has_part': 'BFO:0000051',
}
annotation_properties = {
}
properties = object_properties.copy()
properties.update(annotation_properties)
def __init__(self, graph):
self.gu = GraphUtils(curie_map.get())
self.graph = graph
self.gu.loadProperties(
self.graph, self.object_properties, self.gu.OBJPROP)
return
def addEnvironment(
self, env_id, env_label, env_type=None, env_description=None):
if env_type is None:
env_type = self.environment_parts['environmental_system']
self.gu.addIndividualToGraph(
self.graph, env_id, env_label, env_type, env_description)
return
def addEnvironmentalCondition(
self, cond_id, cond_label, cond_type=None, cond_description=None):
if cond_type is None:
cond_type = self.environment_parts['environmental_condition']
self.gu.addIndividualToGraph(
self.graph, cond_id, cond_label, cond_type, cond_description)
return
def addComponentToEnvironment(self, env_id, component_id):
self.gu.addTriple(
self.graph, env_id,
self.gu.object_properties['has_part'], # TODO cbeck if cself
component_id)
return
def addComponentAttributes(
self, component_id, entity_id, value=None, unit=None):
self.gu.addTriple(
self.graph, component_id, self.gu.object_properties['has_part'],
entity_id)
# TODO add value and units
return
示例15: _process_QTLs_genetic_location
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addIndividualToGraph [as 别名]
def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
logger.info("Processing genetic location for %s", taxon_id)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row
if self.testMode and int(qtl_id) not in self.test_ids:
continue
qtl_id = 'AQTL:'+qtl_id
trait_id = 'AQTLTrait:'+trait_id
# Add QTL to graph
f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
f.addTaxonToFeature(g, taxon_id)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
# add a version of the chromosome which is defined as the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_id)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
start = stop = None
if re.search('-', range_cm):
range_parts = re.split('-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
else:
logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
elif position_cm != '':
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop when schema can handle floats
# add in the genetic location based on the range
f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureToGraph(g)
# sometimes there's a peak marker, like a rsid. we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
gu.addXref(g, qtl_id, dbsnp_id)
if gene_id is not None and gene_id != '' and gene_id != '.':
if gene_id_src == 'NCBIgene' or gene_id_src == '': # we assume if no src is provided, it's NCBI
gene_id = 'NCBIGene:'+gene_id.strip()
geno.addGene(gene_id, None) # we will expect that these labels provided elsewhere
geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation']) # FIXME what is the right relationship here?
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
if self.nobnodes:
vl_id = ':' + vl_id
geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
# add the trait
gu.addClassToGraph(g, trait_id, trait_name)
# Add publication
r = None
if re.match('ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
r = Reference(pub_id)
elif pubmed_id != '':
pub_id = 'PMID:'+pubmed_id.strip()
r = Reference(pub_id, Reference.ref_types['journal_article'])
if r is not None:
#.........这里部分代码省略.........