本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addXref方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addXref方法的具体用法?Python GraphUtils.addXref怎么用?Python GraphUtils.addXref使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addXref方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_process_allelic_variants
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _get_process_allelic_variants(self, entry, g):
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
du = DipperUtil()
if entry is not None:
publist = {} # to hold the entry-specific publication mentions for the allelic variants
entry_num = entry['mimNumber']
# process the ref list just to get the pmids
ref_to_pmid = self._get_pubs(entry, g)
if 'allelicVariantList' in entry:
allelicVariantList = entry['allelicVariantList']
for al in allelicVariantList:
al_num = al['allelicVariant']['number']
al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
al_label = None
al_description = None
if al['allelicVariant']['status'] == 'live':
publist[al_id] = set()
if 'mutations' in al['allelicVariant']:
al_label = al['allelicVariant']['mutations']
if 'text' in al['allelicVariant']:
al_description = al['allelicVariant']['text']
m = re.findall('\{(\d+)\:', al_description)
publist[al_id] = set(m)
geno.addAllele(al_id, al_label, geno.genoparts['variant_locus'], al_description)
geno.addAlleleOfGene(al_id, 'OMIM:'+str(entry_num),
geno.object_properties['is_sequence_variant_instance_of'])
for r in publist[al_id]:
pmid = ref_to_pmid[int(r)]
gu.addTriple(g, pmid, gu.object_properties['is_about'], al_id)
# look up the pubmed id in the list of references
if 'dbSnps' in al['allelicVariant']:
dbsnp_ids = re.split(',', al['allelicVariant']['dbSnps'])
for dnum in dbsnp_ids:
did = 'dbSNP:'+dnum.strip()
gu.addIndividualToGraph(g, did, None)
gu.addEquivalentClass(g, al_id, did)
if 'clinvarAccessions' in al['allelicVariant']:
# clinvarAccessions triple semicolon delimited, each lik eRCV000020059;;1
rcv_ids = re.split(';;;', al['allelicVariant']['clinvarAccessions'])
rcv_ids = [(re.match('(RCV\d+)\;\;', r)).group(1) for r in rcv_ids]
for rnum in rcv_ids:
rid = 'ClinVar:'+rnum
gu.addXref(g, al_id, rid)
gu.addPage(g, al_id, "http://omim.org/entry/"+str(entry_num)+"#"+str(al_num).zfill(4))
elif re.search('moved', al['allelicVariant']['status']):
# for both 'moved' and 'removed'
moved_ids = None
if 'movedTo' in al['allelicVariant']:
moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
moved_ids = [moved_id]
gu.addDeprecatedIndividual(g, al_id, moved_ids)
else:
logger.error('Uncaught alleleic variant status %s', al['allelicVariant']['status'])
# end loop allelicVariantList
return
示例2: _process_straininfo
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
gu = GraphUtils(curie_map.get())
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
f.readline() # read the header row; skip
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:'+str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:'+str(mpd_strainid)
gu.addIndividualToGraph(g, strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
gu.addSynonym(g, strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
gu.addSameIndividual(g, strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
gu.addSameIndividual(g, strain_id, reiken_id)
else:
if url != '':
gu.addXref(g, strain_id, url, True)
if vendor != '':
gu.addXref(
g, strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
gu.addDescription(g, strain_id, desc)
# TODO make the panels as a resource collection
return
示例3: _get_mappedids
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _get_mappedids(self, entry, g):
"""
Extract the Orphanet and UMLS ids as equivalences from the entry
:param entry:
:return:
"""
# umlsIDs
gu = GraphUtils(curie_map.get())
omimid = 'OMIM:'+str(entry['mimNumber'])
orpha_mappings = []
if 'externalLinks' in entry:
links = entry['externalLinks']
if 'orphanetDiseases' in links:
# triple semi-colon delimited list of double semi-colon delimited orphanet ID/disease pairs
# 2970;;566;;Prune belly syndrome
items = links['orphanetDiseases'].split(';;;')
for i in items:
(orpha_num, internal_num, orpha_label) = i.split(';;')
orpha_id = 'Orphanet:'+orpha_num.strip()
orpha_mappings.append(orpha_id)
gu.addClassToGraph(g, orpha_id, orpha_label.strip())
gu.addXref(g, omimid, orpha_id)
if 'umlsIDs' in links:
umls_mappings = links['umlsIDs'].split(',')
for i in umls_mappings:
umls_id = 'UMLS:'+i
gu.addClassToGraph(g, umls_id, None)
gu.addXref(g, omimid, umls_id)
if self._get_omimtype(entry) == Genotype.genoparts['gene'] and 'geneIDs' in links:
entrez_mappings = links['geneIDs']
for i in entrez_mappings.split(','):
gu.addEquivalentClass(g, omimid, 'NCBIGene:'+str(i))
return
示例4: _process_ortholog_classes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _process_ortholog_classes(self, limit=None):
"""
This method add the KEGG orthology classes to the graph.
If there's an embedded enzyme commission number,
that is added as an xref.
Triples created:
<orthology_class_id> is a class
<orthology_class_id> has label <orthology_symbols>
<orthology_class_id> has description <orthology_description>
:param limit:
:return:
"""
logger.info("Processing ortholog classes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(orthology_class_id, orthology_class_name) = row
if self.testMode and \
orthology_class_id not in \
self.test_ids['orthology_classes']:
continue
# The orthology class is essentially a KEGG gene ID
# that is species agnostic.
# Add the ID and label as a gene family class
other_labels = re.split(r'[;,]', orthology_class_name)
# the first one is the label we'll use
orthology_label = other_labels[0]
orthology_class_id = 'KEGG-'+orthology_class_id.strip()
orthology_type = OrthologyAssoc.terms['gene_family']
gu.addClassToGraph(g, orthology_class_id, orthology_label,
orthology_type)
if len(other_labels) > 1:
# add the rest as synonyms
# todo skip the first
for s in other_labels:
gu.addSynonym(g, orthology_class_id, s.strip())
# add the last one as the description
d = other_labels[len(other_labels)-1]
gu.addDescription(g, orthology_class_id, d)
# add the enzyme commission number (EC:1.2.99.5)as an xref
# sometimes there's two, like [EC:1.3.5.1 1.3.5.4]
# can also have a dash, like EC:1.10.3.-
ec_matches = re.findall(r'((?:\d+|\.|-){5,7})', d)
if ec_matches is not None:
for ecm in ec_matches:
gu.addXref(g, orthology_class_id, 'EC:'+ecm)
if not self.testMode and \
limit is not None and line_counter > limit:
break
logger.info("Done with ortholog classes")
return
示例5: OMIA
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
#.........这里部分代码省略.........
aid = assoc.get_association_id()
breed_label = self.label_hash.get(breed_id)
if breed_label is None:
breed_label = "this breed"
m = re.search(r'\((.*)\)', breed_label)
if m:
sp_label = m.group(1)
else:
sp_label = ''
phene_label = self.label_hash.get(phene_id)
if phene_label is None:
phene_label = "phenotype"
elif phene_label.endswith(sp_label):
# some of the labels we made already include the species;
# remove it to make a cleaner desc
phene_label = re.sub(r' in '+sp_label, '', phene_label)
desc = ' '.join(
("High incidence of", phene_label, "in", breed_label,
"suggests it to be a model of disease", i + "."))
self.gu.addDescription(self.g, aid, desc)
return
def _process_lida_links_row(self, row):
# lidaurl, omia_id, added_by
omia_id = 'OMIA:'+row['omia_id']
lidaurl = row['lidaurl']
if self.testMode and omia_id not in self.test_ids['disease']:
return
self.gu.addXref(self.g, omia_id, lidaurl, True)
return
def _process_phene_gene_row(self, row):
gene_id = self.id_hash['gene'].get(row['gene_id'])
phene_id = self.id_hash['phene'].get(row['phene_id'])
omia_id = self._get_omia_id_from_phene_id(phene_id)
if self.testMode and not (
omia_id in self.test_ids['disease'] and
row['gene_id'] in self.test_ids['gene']) or\
gene_id is None or phene_id is None:
return
# occasionally some phenes are missing! (ex: 406)
if phene_id is None:
logger.warning("Phene id %s is missing", str(row['phene_id']))
return
gene_label = self.label_hash[gene_id]
# some variant of gene_id has phenotype d
vl = '_'+re.sub(r'NCBIGene:', '', str(gene_id)) + 'VL'
if self.nobnodes:
vl = ':'+vl
self.geno.addAllele(vl, 'some variant of ' + gene_label)
self.geno.addAlleleOfGene(vl, gene_id)
assoc = G2PAssoc(self.name, vl, phene_id)
assoc.add_association_to_graph(self.g)
# add the gene id to the set of annotated genes
示例6: _get_variants
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
#.........这里部分代码省略.........
if stop != '-' and stop.strip() != '':
f.addFeatureEndLocation(stop, chrinbuild_id)
f.addFeatureToGraph(g)
if bandinbuild_id is not None:
f.addSubsequenceOfFeature(g, bandinbuild_id)
# CHECK - this makes the assumption that there is only one affected chromosome per variant
# what happens with chromosomal rearrangement variants? shouldn't both chromosomes be here?
# add the hgvs as synonyms
if hgvs_c != '-' and hgvs_c.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_c)
if hgvs_p != '-' and hgvs_p.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_p)
# add the dbsnp and dbvar ids as equivalent
if dbsnp_num != '-' and int(dbsnp_num) != -1:
dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
gu.addIndividualToGraph(g, dbsnp_id, None)
gu.addSameIndividual(g, seqalt_id, dbsnp_id)
if dbvar_num != '-':
dbvar_id = 'dbVar:'+dbvar_num
gu.addIndividualToGraph(g, dbvar_id, None)
gu.addSameIndividual(g, seqalt_id, dbvar_id)
# TODO - not sure if this is right... add as xref?
# the rcv is like the combo of the phenotype with the variant
if rcv_nums != '-':
for rcv_num in re.split(';',rcv_nums):
rcv_id = 'ClinVar:'+rcv_num
gu.addIndividualToGraph(g, rcv_id, None)
gu.addXref(g, seqalt_id, rcv_id)
if gene_id is not None:
# add the gene
gu.addClassToGraph(g, gene_id, gene_symbol)
# make a variant locus
vl_id = '_'+gene_num+'-'+variant_num
if self.nobnodes:
vl_id = ':'+vl_id
vl_label = allele_name
gu.addIndividualToGraph(g, vl_id, vl_label, geno.genoparts['variant_locus'])
geno.addSequenceAlterationToVariantLocus(seqalt_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
else:
# some basic reporting
gmatch = re.search('\(\w+\)', allele_name)
if gmatch is not None and len(gmatch.groups()) > 0:
logger.info("Gene found in allele label, but no id provided: %s", gmatch.group(1))
elif re.match('more than 10', gene_symbol):
logger.info("More than 10 genes found; need to process XML to fetch (variant=%d)", int(variant_num))
else:
logger.info("No gene listed for variant %d", int(variant_num))
# parse the list of "phenotypes" which are diseases. add them as an association
# ;GeneReviews:NBK1440,MedGen:C0392514,OMIM:235200,SNOMED CT:35400008;MedGen:C3280096,OMIM:614193;MedGen:CN034317,OMIM:612635;MedGen:CN169374
# the list is both semicolon delimited and comma delimited, but i don't know why!
# some are bad, like: Orphanet:ORPHA ORPHA319705,SNOMED CT:49049000
if phenotype_ids != '-':
for p in pheno_list:
m = re.match("(Orphanet:ORPHA(?:\s*ORPHA)?)", p)
if m is not None and len(m.groups()) > 0:
p = re.sub(m.group(1), 'Orphanet:', p.strip())
elif re.match('SNOMED CT', p):
示例7: _process_trait_mappings
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _process_trait_mappings(self, raw, limit=None):
"""
This method
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
# with open(raw, 'r') as csvfile:
# filereader = csv.reader(csvfile, delimiter=',')
# row_count = sum(1 for row in filereader)
# row_count = row_count - 1
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip header line
for row in filereader:
line_counter += 1
# need to skip the last line
if len(row) < 8:
logger.info("skipping line %d: %s", line_counter, '\t'.join(row))
continue
(vto_id, pto_id, cmo_id, ato_column, species, trait_class, trait_type, qtl_count) = row
ato_id = re.sub('ATO #', 'AQTLTrait:', re.sub('\].*', '', re.sub('\[', '', ato_column)))
ato_label = re.sub('.*\]\s*', '', ato_column)
# if species == 'Cattle':
# ato_id = re.sub('ATO:', 'AQTLTraitCattle:', ato_id)
# elif species == 'Chicken':
# ato_id = re.sub('ATO:', 'AQTLTraitChicken:', ato_id)
# elif species == 'Sheep':
# ato_id = re.sub('ATO:', 'AQTLTraitSheep:', ato_id)
# elif species == 'Horse':
# ato_id = re.sub('ATO:', 'AQTLTraitHorse:', ato_id)
# elif species == 'Pig':
# ato_id = re.sub('ATO:', 'AQTLTraitPig:', ato_id)
# elif species == 'Rainbow trout':
# ato_id = re.sub('ATO:', 'AQTLTraitRainbowTrout:', ato_id)
# else:
# logger.warn(' Unknown species %s found in trait mapping file.', species)
# continue
#print(ato_label)
gu.addClassToGraph(g, ato_id, ato_label.strip())
if re.match('VT:.*', vto_id):
gu.addClassToGraph(g, vto_id, None)
gu.addEquivalentClass(g, ato_id, vto_id)
if re.match('PT:.*', pto_id):
gu.addClassToGraph(g, pto_id, None)
gu.addEquivalentClass(g, ato_id, pto_id)
if re.match('CMO:.*', cmo_id):
gu.addClassToGraph(g, cmo_id, None)
gu.addXref(g, ato_id, cmo_id)
logger.info("Done with trait mappings")
return
示例8: _process_QTLs_genetic_location
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addXref [as 别名]
def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
logger.info("Processing genetic location for %s", taxon_id)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row
if self.testMode and int(qtl_id) not in self.test_ids:
continue
qtl_id = 'AQTL:'+qtl_id
trait_id = 'AQTLTrait:'+trait_id
# Add QTL to graph
f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
f.addTaxonToFeature(g, taxon_id)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
# add a version of the chromosome which is defined as the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_id)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
start = stop = None
if re.search('-', range_cm):
range_parts = re.split('-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
else:
logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
elif position_cm != '':
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop when schema can handle floats
# add in the genetic location based on the range
f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureToGraph(g)
# sometimes there's a peak marker, like a rsid. we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
gu.addXref(g, qtl_id, dbsnp_id)
if gene_id is not None and gene_id != '' and gene_id != '.':
if gene_id_src == 'NCBIgene' or gene_id_src == '': # we assume if no src is provided, it's NCBI
gene_id = 'NCBIGene:'+gene_id.strip()
geno.addGene(gene_id, None) # we will expect that these labels provided elsewhere
geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation']) # FIXME what is the right relationship here?
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
if self.nobnodes:
vl_id = ':' + vl_id
geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
# add the trait
gu.addClassToGraph(g, trait_id, trait_name)
# Add publication
r = None
if re.match('ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
r = Reference(pub_id)
elif pubmed_id != '':
pub_id = 'PMID:'+pubmed_id.strip()
r = Reference(pub_id, Reference.ref_types['journal_article'])
if r is not None:
#.........这里部分代码省略.........