本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addSameIndividual方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addSameIndividual方法的具体用法?Python GraphUtils.addSameIndividual怎么用?Python GraphUtils.addSameIndividual使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addSameIndividual方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_straininfo
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
gu = GraphUtils(curie_map.get())
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
f.readline() # read the header row; skip
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:'+str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:'+str(mpd_strainid)
gu.addIndividualToGraph(g, strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
gu.addSynonym(g, strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
gu.addSameIndividual(g, strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
gu.addSameIndividual(g, strain_id, reiken_id)
else:
if url != '':
gu.addXref(g, strain_id, url, True)
if vendor != '':
gu.addXref(
g, strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
gu.addDescription(g, strain_id, desc)
# TODO make the panels as a resource collection
return
示例2: process_pub_xrefs
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
def process_pub_xrefs(self, limit=None):
raw = '/'.join((self.rawdir, self.files['pub_xrefs']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing publication xrefs")
line_counter = 0
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(wb_ref, xref) = row
# WBPaper00000009 pmid8805<BR>
# WBPaper00000011 doi10.1139/z78-244<BR>
# WBPaper00000012 cgc12<BR>
if self.testMode and wb_ref not in self.test_ids['pub']:
continue
ref_id = 'WormBase:'+wb_ref
xref_id = r = None
xref = re.sub(r'<BR>', '', xref)
xref = xref.strip()
if re.match(r'pmid', xref):
xref_id = 'PMID:'+re.sub(r'pmid\s*', '', xref)
r = Reference(
xref_id, Reference.ref_types['journal_article'])
elif re.search(r'[\(\)\<\>\[\]\s]', xref):
continue
elif re.match(r'doi', xref):
xref_id = 'DOI:'+re.sub(r'doi', '', xref.strip())
r = Reference(xref_id)
elif re.match(r'cgc', xref):
# TODO not sure what to do here with cgc xrefs
continue
else:
# logger.debug("Other xrefs like %s", xref)
continue
if xref_id is not None:
r.addRefToGraph(g)
gu.addSameIndividual(g, ref_id, xref_id)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例3: _process_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
#.........这里部分代码省略.........
# but we'll deal with it as-is for now.
short_desc = (description.split(';')[0]).capitalize()
if affected == 'Yes':
affected = 'affected'
elif affected == 'No':
affected = 'unaffected'
gender = gender.lower()
patient_label = ' '.join((affected, gender, relprob))
if relprob == 'proband':
patient_label = \
' '.join(
(patient_label.strip(), 'with', short_desc))
else:
patient_label = \
' '.join(
(patient_label.strip(), 'of proband with',
short_desc))
# ############# BUILD THE CELL LINE #############
# Adding the cell line as a typed individual.
cell_line_reagent_id = 'CLO:0000031'
gu.addIndividualToGraph(
g, cell_line_id, line_label, cell_line_reagent_id)
# add the equivalent id == dna_ref
if dna_ref != '' and dna_ref != catalog_id:
equiv_cell_line = 'Coriell:'+dna_ref
# some of the equivalent ids are not defined
# in the source data; so add them
gu.addIndividualToGraph(
g, equiv_cell_line, None, cell_line_reagent_id)
gu.addSameIndividual(g, cell_line_id, equiv_cell_line)
# Cell line derives from patient
geno.addDerivesFrom(cell_line_id, patient_id)
geno.addDerivesFrom(cell_line_id, cell_type)
# Cell line a member of repository
gu.addMember(g, repository, cell_line_id)
if cat_remark != '':
gu.addDescription(g, cell_line_id, cat_remark)
# Cell age_at_sampling
# TODO add the age nodes when modeled properly in #78
# if (age != ''):
# this would give a BNode that is an instance of Age.
# but i don't know how to connect
# the age node to the cell line? we need to ask @mbrush
# age_id = '_'+re.sub('\s+','_',age)
# gu.addIndividualToGraph(
# g,age_id,age,self.terms['age'])
# gu.addTriple(
# g,age_id,self.properties['has_measurement'],age,
# True)
# ############# BUILD THE PATIENT #############
# Add the patient ID as an individual.
gu.addPerson(g, patient_id, patient_label)
# TODO map relationship to proband as a class
# (what ontology?)
# Add race of patient
示例4: OMIA
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
#.........这里部分代码省略.........
writer.writerow((str(phene),
self.stored_omia_mol_gen[phene]['mol_gen'],
self.stored_omia_mol_gen[phene]['map_info'],
self.stored_omia_mol_gen[phene]['species']))
logger.info(
"Wrote %d potential G2P descriptions for curation to %s",
len(self.stored_omia_mol_gen), f)
return
def _process_article_row(self, row):
# don't bother in test mode
if self.testMode:
return
iarticle_id = self._make_internal_id('article', row['article_id'])
self.id_hash['article'][row['article_id']] = iarticle_id
rtype = None
if row['journal'] != '':
rtype = Reference.ref_types['journal_article']
r = Reference(iarticle_id, rtype)
if row['title'] is not None:
r.setTitle(row['title'].strip())
if row['year'] is not None:
r.setYear(row['year'])
r.addRefToGraph(self.g)
if row['pubmed_id'] is not None:
pmid = 'PMID:'+str(row['pubmed_id'])
self.id_hash['article'][row['article_id']] = pmid
self.gu.addSameIndividual(self.g, iarticle_id, pmid)
self.gu.addComment(self.g, pmid, iarticle_id)
return
def _process_omia_group_row(self, row):
omia_id = 'OMIA:'+row['omia_id']
if self.testMode and omia_id not in self.test_ids['disease']:
return
group_name = row['group_name']
group_summary = row['group_summary']
disease_id = None
group_category = row.get('group_category')
disease_id = \
self.map_omia_group_category_to_ontology_id(group_category)
if disease_id is not None:
self.gu.addClassToGraph(self.g, disease_id, None)
if disease_id == 'MP:0008762': # embryonic lethal
# add this as a phenotype association
# add embryonic onset
assoc = D2PAssoc(self.name, omia_id, disease_id)
assoc.add_association_to_graph(self.g)
disease_id = None
else:
logger.info(
"No disease superclass defined for %s: %s",
omia_id, group_name)
# default to general disease FIXME this may not be desired
disease_id = 'DOID:4'
示例5: _get_gene_info
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
#.........这里部分代码省略.........
if name != '-':
gu.addSynonym(g, gene_id, name)
if synonyms.strip() != '-':
for s in synonyms.split('|'):
gu.addSynonym(
g, gene_id, s.strip(),
Assoc.annotation_properties['hasRelatedSynonym'])
if other_designations.strip() != '-':
for s in other_designations.split('|'):
gu.addSynonym(
g, gene_id, s.strip(),
Assoc.annotation_properties['hasRelatedSynonym'])
# deal with the xrefs
# MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696
if xrefs.strip() != '-':
for r in xrefs.strip().split('|'):
fixedr = self._cleanup_id(r)
if fixedr is not None and fixedr.strip() != '':
if re.match(r'HPRD', fixedr):
# proteins are not == genes.
gu.addTriple(
g, gene_id,
self.properties[
'has_gene_product'], fixedr)
else:
# skip some of these for now
if fixedr.split(':')[0] not in [
'Vega', 'IMGT/GENE-DB']:
if self.class_or_indiv.get(gene_id) == 'C':
gu.addEquivalentClass(
g, gene_id, fixedr)
else:
gu.addSameIndividual(
g, gene_id, fixedr)
# edge cases of id | symbol | chr | map_loc:
# 263 AMD1P2 X|Y with Xq28 and Yq12
# 438 ASMT X|Y with Xp22.3 or Yp11.3 # in PAR
# no idea why there's two bands listed - possibly 2 assemblies
# 419 ART3 4 with 4q21.1|4p15.1-p14
# 28227 PPP2R3B X|Y Xp22.33; Yp11.3 # in PAR
# this is of "unknown" type == susceptibility
# 619538 OMS 10|19|3 10q26.3;19q13.42-q13.43;3p25.3
# unlocated scaffold
# 101928066 LOC101928066 1|Un -\
# mouse --> 2C3
# 11435 Chrna1 2 2 C3|2 43.76 cM
# mouse --> 11B1.1
# 11548 Adra1b 11 11 B1.1|11 25.81 cM
# 11717 Ampd3 7 7 57.85 cM|7 E2-E3 # mouse
# 14421 B4galnt1 10 10 D3|10 74.5 cM # mouse
# 323212 wu:fb92e12 19|20 - # fish
# 323368 ints10 6|18 - # fish
# 323666 wu:fc06e02 11|23 - # fish
# feel that the chr placement can't be trusted in this table
# when there is > 1 listed
# with the exception of human X|Y,
# we will only take those that align to one chr
# FIXME remove the chr mapping below
# when we pull in the genomic coords
if str(chrom) != '-' and str(chrom) != '':
if re.search(r'\|', str(chrom)) and \
str(chrom) not in ['X|Y', 'X; Y']:
示例6: UCSCBands
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
#.........这里部分代码省略.........
parent_chrom_in_build = makeChromID(myband['parent'],
build_num, 'MONARCH')
bfeature.addSubsequenceOfFeature(self.graph,
parent_chrom_in_build)
# add the band as a feature
# (which also instantiates the owl:Individual)
bfeature.addFeatureStartLocation(myband['min'], chrom_in_build_id)
bfeature.addFeatureEndLocation(myband['max'], chrom_in_build_id)
if 'stain' in myband and myband['stain'] is not None:
# TODO TEC I recall 'has_staining_intensity' being dropped by MB
bfeature.addFeatureProperty(self.graph,
Feature.properties['has_staining_intensity'],
myband['stain'])
# type the band as a faldo:Region directly (add_region=False)
# bfeature.setNoBNodes(self.nobnodes)
# to come when we merge in ZFIN.py
bfeature.addFeatureToGraph(self.graph, False)
return
def _create_genome_builds(self):
"""
Various resources will map variations to either UCSC (hg*)
or to NCBI assemblies. Here we create the equivalences between them.
Data taken from:
https://genome.ucsc.edu/FAQ/FAQreleases.html#release1
:return:
"""
# TODO add more species
ucsc_assembly_id_map = {
"9606": {
"UCSC:hg38": "NCBIGenome:GRCh38",
"UCSC:hg19": "NCBIGenome:GRCh37",
"UCSC:hg18": "NCBIGenome:36.1",
"UCSC:hg17": "NCBIGenome:35",
"UCSC:hg16": "NCBIGenome:34",
"UCSC:hg15": "NCBIGenome:33",
},
"7955": {
"UCSC:danRer10": "NCBIGenome:GRCz10",
"UCSC:danRer7": "NCBIGenome:Zv9",
"UCSC:danRer6": "NCBIGenome:Zv8",
},
"10090": {
"UCSC:mm10": "NCBIGenome:GRCm38",
"UCSC:mm9": "NCBIGenome:37"
},
"9031": {
"UCSC:galGal4": "NCBIAssembly:317958",
},
"9913": {
"UCSC:bosTau7": "NCBIAssembly:GCF_000003205.5",
},
"9823": {
"UCSC:susScr3": "NCBIAssembly:304498",
},
"9940": {
"UCSC:oviAri3": "NCBIAssembly:GCF_000298735.1",
},
"9796": {
"UCSC:equCab2": "NCBIAssembly:GCF_000002305.2",
}
}
g = self.graph
geno = Genotype(g)
logger.info("Adding equivalent assembly identifiers")
for sp in ucsc_assembly_id_map:
tax_num = sp
tax_id = 'NCBITaxon:'+tax_num
mappings = ucsc_assembly_id_map[sp]
for i in mappings:
ucsc_id = i
ucsc_label = re.split(':', i)[1]
mapped_id = mappings[i]
mapped_label = re.split(':', mapped_id)[1]
mapped_label = 'NCBI build '+str(mapped_label)
geno.addReferenceGenome(ucsc_id, ucsc_label, tax_id)
geno.addReferenceGenome(mapped_id, mapped_label, tax_id)
self.gu.addSameIndividual(g, ucsc_id, mapped_id)
return
def _check_tax_ids(self):
for taxon in self.tax_ids:
if str(taxon) not in self.files:
raise Exception("Taxon " + str(taxon) + " not supported"
" by source UCSCBands")
def getTestSuite(self):
import unittest
from tests.test_ucscbands import UCSCBandsTestCase
test_suite = unittest.TestLoader().loadTestsFromTestCase(UCSCBandsTestCase)
return test_suite
示例7: _get_variants
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addSameIndividual [as 别名]
#.........这里部分代码省略.........
# probably will get taken care of when we switch to processing the xml
# for example, variant_num = 38562
# but there's no way to tell if it's a haplotype in the csv data
# so the dbsnp or dbvar should probably be primary, and the variant num be the vslc,
# with each of the dbsnps being added to it
# todo clinical significance needs to be mapped to a list of terms
# first, make the variant:
f = Feature(seqalt_id, allele_name, allele_type_id)
if start != '-' and start.strip() != '':
f.addFeatureStartLocation(start, chrinbuild_id)
if stop != '-' and stop.strip() != '':
f.addFeatureEndLocation(stop, chrinbuild_id)
f.addFeatureToGraph(g)
if bandinbuild_id is not None:
f.addSubsequenceOfFeature(g, bandinbuild_id)
# CHECK - this makes the assumption that there is only one affected chromosome per variant
# what happens with chromosomal rearrangement variants? shouldn't both chromosomes be here?
# add the hgvs as synonyms
if hgvs_c != '-' and hgvs_c.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_c)
if hgvs_p != '-' and hgvs_p.strip() != '':
gu.addSynonym(g, seqalt_id, hgvs_p)
# add the dbsnp and dbvar ids as equivalent
if dbsnp_num != '-' and int(dbsnp_num) != -1:
dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
gu.addIndividualToGraph(g, dbsnp_id, None)
gu.addSameIndividual(g, seqalt_id, dbsnp_id)
if dbvar_num != '-':
dbvar_id = 'dbVar:'+dbvar_num
gu.addIndividualToGraph(g, dbvar_id, None)
gu.addSameIndividual(g, seqalt_id, dbvar_id)
# TODO - not sure if this is right... add as xref?
# the rcv is like the combo of the phenotype with the variant
if rcv_nums != '-':
for rcv_num in re.split(';',rcv_nums):
rcv_id = 'ClinVar:'+rcv_num
gu.addIndividualToGraph(g, rcv_id, None)
gu.addXref(g, seqalt_id, rcv_id)
if gene_id is not None:
# add the gene
gu.addClassToGraph(g, gene_id, gene_symbol)
# make a variant locus
vl_id = '_'+gene_num+'-'+variant_num
if self.nobnodes:
vl_id = ':'+vl_id
vl_label = allele_name
gu.addIndividualToGraph(g, vl_id, vl_label, geno.genoparts['variant_locus'])
geno.addSequenceAlterationToVariantLocus(seqalt_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
else:
# some basic reporting
gmatch = re.search('\(\w+\)', allele_name)
if gmatch is not None and len(gmatch.groups()) > 0:
logger.info("Gene found in allele label, but no id provided: %s", gmatch.group(1))
elif re.match('more than 10', gene_symbol):
logger.info("More than 10 genes found; need to process XML to fetch (variant=%d)", int(variant_num))
else: