本文整理汇总了Python中dipper.models.Model.Model.addIndividualToGraph方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addIndividualToGraph方法的具体用法?Python Model.addIndividualToGraph怎么用?Python Model.addIndividualToGraph使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.addIndividualToGraph方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_collection
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _process_collection(self, collection_id, label, page):
"""
This function will process the data supplied internally
about the repository from Coriell.
Triples:
Repository a ERO:collection
rdf:label Literal(label)
foaf:page Literal(page)
:param collection_id:
:param label:
:param page:
:return:
"""
# ############# BUILD THE CELL LINE REPOSITORY #############
for graph in [self.graph, self.testgraph]:
# TODO: How to devise a label for each repository?
model = Model(graph)
reference = Reference(graph)
repo_id = 'CoriellCollection:' + collection_id
repo_label = label
repo_page = page
model.addIndividualToGraph(
repo_id, repo_label, self.globaltt['collection'])
reference.addPage(repo_id, repo_page)
return
示例2: _add_assertion_provenance
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _add_assertion_provenance(
self,
assoc_id,
evidence_line_bnode
):
"""
Add assertion level provenance, currently always IMPC
:param assoc_id:
:param evidence_line_bnode:
:return:
"""
provenance_model = Provenance(self.graph)
model = Model(self.graph)
assertion_bnode = self.make_id(
"assertion{0}{1}".format(assoc_id, self.localtt['IMPC']), '_')
model.addIndividualToGraph(assertion_bnode, None, self.globaltt['assertion'])
provenance_model.add_assertion(
assertion_bnode, self.localtt['IMPC'],
'International Mouse Phenotyping Consortium')
self.graph.addTriple(
assoc_id, self.globaltt['proposition_asserted_in'], assertion_bnode)
self.graph.addTriple(
assertion_bnode,
self.resolve('is_assertion_supported_by_evidence'), # "SEPIO:0000111"
evidence_line_bnode)
return
示例3: _add_assertion_provenance
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _add_assertion_provenance(
self, assoc_id, evidence_line_bnode, impc_map):
"""
Add assertion level provenance, currently always IMPC
:param assoc_id:
:param evidence_line_bnode:
:return:
"""
provenance_model = Provenance(self.graph)
model = Model(self.graph)
assertion_bnode = self.make_id("assertion{0}{1}".format(
assoc_id, impc_map['asserted_by']['IMPC']), '_')
model.addIndividualToGraph(
assertion_bnode, None,
provenance_model.provenance_types['assertion'])
provenance_model.add_assertion(
assertion_bnode, impc_map['asserted_by']['IMPC'],
'International Mouse Phenotyping Consortium')
self.graph.addTriple(
assoc_id, provenance_model.object_properties['is_asserted_in'],
assertion_bnode)
self.graph.addTriple(
assertion_bnode,
provenance_model.object_properties['is_assertion_supported_by'],
evidence_line_bnode)
return
示例4: _process_straininfo
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
self.check_header(self.files['straininfo']['file'], f.readline())
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:' + str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:' + str(mpd_strainid)
model.addIndividualToGraph(strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
model.addSynonym(strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
model.addSameIndividual(strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
model.addSameIndividual(strain_id, reiken_id)
else:
if url != '':
model.addXref(strain_id, url, True)
if vendor != '':
model.addXref(
strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
model.addDescription(strain_id, desc)
# TODO make the panels as a resource collection
return
示例5: Environment
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
class Environment():
"""
These methods provide convenient methods
to add items related to an experimental environment
and it's parts to a supplied graph.
This is a stub.
"""
def __init__(self, graph):
if isinstance(graph, Graph):
self.graph = graph
else:
raise ValueError("{} is not a graph".format(graph))
self.model = Model(self.graph)
self.globaltt = self.graph.globaltt
self.globaltcid = self.graph.globaltcid
self.curie_map = self.graph.curie_map
return
def addEnvironment(
self, env_id, env_label, env_type=None, env_description=None):
if env_type is None:
env_type = self.globaltt['environmental_system']
self.model.addIndividualToGraph(
env_id, env_label, env_type, env_description)
return
def addEnvironmentalCondition(
self, cond_id, cond_label, cond_type=None, cond_description=None):
if cond_type is None:
cond_type = self.globaltt['environmental_condition']
self.model.addIndividualToGraph(
cond_id, cond_label, cond_type, cond_description)
return
def addComponentToEnvironment(self, env_id, component_id):
self.graph.addTriple(env_id, self.globaltt['has_part'], component_id)
return
def addComponentAttributes(self, component_id, entity_id, value=None, unit=None):
self.graph.addTriple(
component_id, self.globaltt['has_part'], entity_id)
# TODO add value and units
return
示例6: _make_pheno_assoc
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _make_pheno_assoc(self, g, gene_id, gene_symbol, disorder_num,
disorder_label, phene_key):
geno = Genotype(g)
model = Model(g)
disorder_id = ':'.join(('OMIM', disorder_num))
rel_id = model.object_properties['has_phenotype'] # default
rel_label = 'causes'
if re.match(r'\[', disorder_label):
rel_id = model.object_properties['is_marker_for']
rel_label = 'is a marker for'
elif re.match(r'\{', disorder_label):
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
elif re.match(r'\?', disorder_label):
# this is a questionable mapping! skip?
rel_id = model.object_properties['contributes_to']
rel_label = 'contributes to'
evidence = self._map_phene_mapping_code_to_eco(phene_key)
# we actually want the association between the gene and the disease
# to be via an alternate locus not the "wildtype" gene itself.
# so we make an anonymous alternate locus,
# and put that in the association.
# but we only need to do that in the cases when it's not an NCBIGene
# (as that is a sequence feature itself)
if re.match(r'OMIM:', gene_id):
alt_locus = '_:'+re.sub(r':', '', gene_id)+'-'+disorder_num+'VL'
alt_label = gene_symbol.strip()
if alt_label is not None and alt_label != '':
alt_label = \
' '.join(('some variant of', alt_label,
'that', rel_label, disorder_label))
else:
alt_label = None
model.addIndividualToGraph(
alt_locus, alt_label, Genotype.genoparts['variant_locus'])
geno.addAffectedLocus(alt_locus, gene_id)
model.addBlankNodeAnnotation(alt_locus)
else:
# assume it's already been added
alt_locus = gene_id
assoc = G2PAssoc(g, self.name, alt_locus, disorder_id, rel_id)
assoc.add_evidence(evidence)
assoc.add_association_to_graph()
return
示例7: _add_gene_to_graph
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _add_gene_to_graph(self, gene, variant_bnode, gene_id, relation):
"""
:param gene:
:param variant_bnode:
:return:
"""
model = Model(self.graph)
if gene_id:
self.graph.addTriple(variant_bnode, relation, gene_id)
elif gene:
LOG.info("gene %s not mapped to NCBI gene, making blank node", gene)
gene_bnode = self.make_id("{0}".format(gene), "_")
model.addIndividualToGraph(gene_bnode, gene)
self.graph.addTriple(variant_bnode, relation, gene_bnode)
示例8: _add_gene_anatomy_association
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _add_gene_anatomy_association(self, gene_id, anatomy_curie, rank):
"""
:param gene_id: str Non curified ID
:param gene_label: str Gene symbol
:param anatomy_curie: str curified anatomy term
:param rank: str rank
:return: None
"""
g2a_association = Assoc(self.graph, self.name)
model = Model(self.graph)
gene_curie = "ENSEMBL:{}".format(gene_id)
rank = re.sub(r',', '', str(rank)) # ? can't do RE on a float ...
model.addIndividualToGraph(gene_curie, None)
g2a_association.sub = gene_curie
g2a_association.obj = anatomy_curie
g2a_association.rel = self.globaltt['expressed in']
g2a_association.add_association_to_graph()
g2a_association.add_predicate_object(
self.globaltt['has_quantifier'], float(rank), 'Literal', 'xsd:float')
return
示例9: _add_gene_anatomy_association
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _add_gene_anatomy_association(self, gene_id, anatomy_curie, rank):
"""
:param gene_id: str Non curified ID
:param gene_label: str Gene symbol
:param anatomy_curie: str curified anatomy term
:param rank: str rank
:return: None
"""
g2a_association = Assoc(self.graph, self.name)
genotype = Genotype(self.graph)
model = Model(self.graph)
gene_curie = "ENSEMBL:{}".format(gene_id)
rank = re.sub(r',', '', rank)
model.addIndividualToGraph(ind_id=gene_curie, label=None,
ind_type=genotype.genoparts['gene'])
g2a_association.sub = gene_curie
g2a_association.obj = anatomy_curie
g2a_association.rel = Assoc.object_properties['expressed_in']
g2a_association.add_association_to_graph()
g2a_association.add_predicate_object(
Assoc.datatype_properties['has_quantifier'],
float(rank), 'Literal', 'xsd:float')
return
示例10: __init__
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
self.ref_url = None
self.title = None
self.year = None
self.author_list = None
self.short_citation = None
self.model = Model(self.graph)
self.globaltt = self.graph.globaltt
self.globaltcid = self.graph.globaltcid
self.curie_map = self.graph.curie_map
if ref_type is None:
self.ref_type = self.globaltt['document']
else:
self.ref_type = ref_type
if ref_type[:4] not in ('IAO:', 'SIO:'):
LOG.warning("Got Pub ref type of: %s", ref_type)
if ref_id is not None and ref_id[:4] == 'http':
self.ref_url = ref_id
return
def setTitle(self, title):
self.title = title
return
def setYear(self, year):
self.year = year
return
def setType(self, reference_type):
self.ref_type = reference_type
return
def setAuthorList(self, author_list):
"""
:param author_list: Array of authors
:return:
"""
self.author_list = author_list
return
def addAuthor(self, author):
self.author_list += [author]
return
def setShortCitation(self, citation):
self.short_citation = citation
return
def addPage(self, subject_id, page_url):
self.graph.addTriple(
subject_id, self.globaltt['page'], # foaf:page not <sio:web page>
page_url, object_is_literal=True)
return
def addTitle(self, subject_id, title):
if title is not None and title != '':
self.graph.addTriple(
subject_id, self.globaltt['title (dce)'], title, object_is_literal=True)
return
def addRefToGraph(self):
cite = self.short_citation
if cite is None and self.title is not None:
cite = self.title
if self.ref_url is not None:
if self.title is not None:
self.addTitle(self.ref_url, self.title)
self.model.addType(self.ref_url, self.ref_type)
if cite is not None:
self.model.addLabel(self.ref_url, cite)
elif self.ref_id is not None:
self.model.addIndividualToGraph(self.ref_id, cite, self.ref_type)
if self.title is not None:
self.addTitle(self.ref_id, self.title)
else:
# should never be true
LOG.error("You are missing an identifier for a reference.")
# TODO what is the property here to add the date?
# if self.year is not None:
# gu.addTriple()
# if self.author_list is not None:
# for auth in self.author_list:
# gu.addTriple(
# graph, self.ref_id, self.props['has_author'], auth, True)
return
示例11: _get_process_allelic_variants
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _get_process_allelic_variants(self, entry, g):
model = Model(g)
reference = Reference(g)
geno = Genotype(g)
if entry is not None:
# to hold the entry-specific publication mentions
# for the allelic variants
publist = {}
entry_num = entry['mimNumber']
# process the ref list just to get the pmids
ref_to_pmid = self._get_pubs(entry, g)
if 'allelicVariantList' in entry:
allelicVariantList = entry['allelicVariantList']
for al in allelicVariantList:
al_num = al['allelicVariant']['number']
al_id = 'OMIM:'+str(entry_num)+'.'+str(al_num).zfill(4)
al_label = None
al_description = None
if al['allelicVariant']['status'] == 'live':
publist[al_id] = set()
if 'mutations' in al['allelicVariant']:
al_label = al['allelicVariant']['mutations']
if 'text' in al['allelicVariant']:
al_description = al['allelicVariant']['text']
m = re.findall(r'\{(\d+)\:', al_description)
publist[al_id] = set(m)
geno.addAllele(
al_id, al_label, geno.genoparts['variant_locus'],
al_description)
geno.addAlleleOfGene(
al_id, 'OMIM:'+str(entry_num),
geno.object_properties[
'is_sequence_variant_instance_of'])
for r in publist[al_id]:
pmid = ref_to_pmid[int(r)]
g.addTriple(
pmid, model.object_properties['is_about'],
al_id)
# look up the pubmed id in the list of references
if 'dbSnps' in al['allelicVariant']:
dbsnp_ids = \
re.split(r',', al['allelicVariant']['dbSnps'])
for dnum in dbsnp_ids:
did = 'dbSNP:'+dnum.strip()
model.addIndividualToGraph(did, None)
model.addSameIndividual(al_id, did)
if 'clinvarAccessions' in al['allelicVariant']:
# clinvarAccessions triple semicolon delimited
# each >1 like RCV000020059;;;
rcv_ids = \
re.split(
r';;;',
al['allelicVariant']['clinvarAccessions'])
rcv_ids = [
(re.match(r'(RCV\d+);*', r)).group(1)
for r in rcv_ids]
for rnum in rcv_ids:
rid = 'ClinVar:'+rnum
model.addXref(al_id, rid)
reference.addPage(
al_id, "http://omim.org/entry/" +
str(entry_num)+"#" + str(al_num).zfill(4))
elif re.search(
r'moved', al['allelicVariant']['status']):
# for both 'moved' and 'removed'
moved_ids = None
if 'movedTo' in al['allelicVariant']:
moved_id = 'OMIM:'+al['allelicVariant']['movedTo']
moved_ids = [moved_id]
model.addDeprecatedIndividual(al_id, moved_ids)
else:
logger.error('Uncaught alleleic variant status %s',
al['allelicVariant']['status'])
# end loop allelicVariantList
return
示例12: _process_qtls_genomic_location
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
def _process_qtls_genomic_location(
self, raw, txid, build_id, build_label, common_name, limit=None):
"""
This method
Triples created:
:param limit:
:return:
"""
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
line_counter = 0
geno = Genotype(graph)
# assume that chrs get added to the genome elsewhere
taxon_curie = 'NCBITaxon:' + txid
eco_id = self.globaltt['quantitative trait analysis evidence']
LOG.info("Processing QTL locations for %s from %s", taxon_curie, raw)
with gzip.open(raw, 'rt', encoding='ISO-8859-1') as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for row in reader:
line_counter += 1
if re.match(r'^#', ' '.join(row)):
continue
(chromosome, qtl_source, qtl_type, start_bp, stop_bp, frame, strand,
score, attr) = row
example = '''
Chr.Z Animal QTLdb Production_QTL 33954873 34023581...
QTL_ID=2242;Name="Spleen percentage";Abbrev="SPLP";PUBMED_ID=17012160;trait_ID=2234;
trait="Spleen percentage";breed="leghorn";"FlankMarkers=ADL0022";VTO_name="spleen mass";
MO_name="spleen weight to body weight ratio";Map_Type="Linkage";Model="Mendelian";
Test_Base="Chromosome-wise";Significance="Significant";P-value="<0.05";F-Stat="5.52";
Variance="2.94";Dominance_Effect="-0.002";Additive_Effect="0.01
'''
str(example)
# make dictionary of attributes
# keys are:
# QTL_ID,Name,Abbrev,PUBMED_ID,trait_ID,trait,FlankMarkers,
# VTO_name,Map_Type,Significance,P-value,Model,
# Test_Base,Variance, Bayes-value,PTO_name,gene_IDsrc,peak_cM,
# CMO_name,gene_ID,F-Stat,LOD-score,Additive_Effect,
# Dominance_Effect,Likelihood_Ratio,LS-means,Breed,
# trait (duplicate with Name),Variance,Bayes-value,
# F-Stat,LOD-score,Additive_Effect,Dominance_Effect,
# Likelihood_Ratio,LS-means
# deal with poorly formed attributes
if re.search(r'"FlankMarkers";', attr):
attr = re.sub(r'FlankMarkers;', '', attr)
attr_items = re.sub(r'"', '', attr).split(";")
bad_attrs = set()
for attributes in attr_items:
if not re.search(r'=', attributes):
# remove this attribute from the list
bad_attrs.add(attributes)
attr_set = set(attr_items) - bad_attrs
attribute_dict = dict(item.split("=") for item in attr_set)
qtl_num = attribute_dict.get('QTL_ID')
if self.test_mode and int(qtl_num) not in self.test_ids:
continue
# make association between QTL and trait based on taxon
qtl_id = common_name + 'QTL:' + str(qtl_num)
model.addIndividualToGraph(qtl_id, None, self.globaltt['QTL'])
geno.addTaxon(taxon_curie, qtl_id)
#
trait_id = 'AQTLTrait:' + attribute_dict.get('trait_ID')
# if pub is in attributes, add it to the association
pub_id = None
if 'PUBMED_ID' in attribute_dict.keys():
pub_id = attribute_dict.get('PUBMED_ID')
if re.match(r'ISU.*', pub_id):
pub_id = 'AQTLPub:' + pub_id.strip()
reference = Reference(graph, pub_id)
else:
pub_id = 'PMID:' + pub_id.strip()
reference = Reference(
graph, pub_id, self.globaltt['journal article'])
reference.addRefToGraph()
# Add QTL to graph
assoc = G2PAssoc(
graph, self.name, qtl_id, trait_id,
self.globaltt['is marker for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
if 'P-value' in attribute_dict.keys():
scr = re.sub(r'<', '', attribute_dict.get('P-value'))
if ',' in scr:
scr = re.sub(r',', '.', scr)
if scr.isnumeric():
#.........这里部分代码省略.........
示例13: Decipher
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
# ??? rel is never used
# if category.strip() == 'Confirmed DD gene':
# rel = self.self.globaltt['has phenotype']
# elif category.strip() == 'Probable DD gene':
# rel = self.self.globaltt['has phenotype']
# elif category.strip() == 'Possible DD gene':
# rel = self.self.globaltt['contributes to']
# elif category.strip() == 'Not DD gene':
# # TODO negative annotation
# continue
assoc = G2PAssoc(graph, self.name, allele_id, omim_id)
# TODO 'rel' is assigned to but never used
for p in re.split(r';', pubmed_ids):
p = p.strip()
if p != '':
pmid = 'PMID:' + str(p)
r = Reference(
graph, pmid, self.globaltt['journal article'])
r.addRefToGraph()
assoc.add_source(pmid)
assoc.add_association_to_graph()
else:
# these are unmapped to a disease id.
# note that some match OMIM disease labels
# but the identifiers are just not included.
# TODO consider mapping to OMIM or DOIDs in other ways
LOG.warning(
"No omim id on line %d\n%s", line_counter, str(row))
unmapped_omim_counter += 1
# TODO hpo phenotypes
# since the DDG2P file is not documented,
# I don't know what the HPO annotations are actually about
# are they about the gene? the omim disease? something else?
# So, we wont create associations until this is clarified
if not self.test_mode and limit is not None and line_counter > limit:
break
myzip.close()
LOG.warning(
"gene-disorder associations with no omim id: %d",
unmapped_omim_counter)
LOG.warning("unmapped gene count: %d", unmapped_gene_count)
return
def make_allele_by_consequence(self, consequence, gene_id, gene_symbol):
"""
Given a "consequence" label that describes a variation type,
create an anonymous variant of the specified gene as an instance of
that consequence type.
:param consequence:
:param gene_id:
:param gene_symbol:
:return: allele_id
"""
allele_id = None
# Loss of function : Nonsense, frame-shifting indel,
# essential splice site mutation, whole gene deletion or any other
# mutation where functional analysis demonstrates clear reduction
# or loss of function
# All missense/in frame : Where all the mutations described in the data
# source are either missense or in frame deletions and there is no
# evidence favoring either loss-of-function, activating or
# dominant negative effect
# Dominant negative : Mutation within one allele of a gene that creates
# a significantly greater deleterious effect on gene product
# function than a monoallelic loss of function mutation
# Activating : Mutation, usually missense that results in
# a constitutive functional activation of the gene product
# Increased gene dosage : Copy number variation that increases
# the functional dosage of the gene
# Cis-regulatory or promotor mutation : Mutation in cis-regulatory
# elements that lies outwith the known transcription unit and
# promotor of the controlled gene
# Uncertain : Where the exact nature of the mutation is unclear or
# not recorded
type_id = self.resolve(consequence, mandatory=False)
if type_id == consequence:
LOG.warning("Consequence type unmapped: %s", str(consequence))
type_id = self.globaltt['sequence_variant']
# make the allele
allele_id = ''.join((gene_id, type_id))
allele_id = re.sub(r':', '', allele_id)
allele_id = '_:'+allele_id # make this a BNode
allele_label = ' '.join((consequence, 'allele in', gene_symbol))
self.model.addIndividualToGraph(allele_id, allele_label, type_id)
self.geno.addAlleleOfGene(allele_id, gene_id)
return allele_id
示例14: _process_data
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
# properties of the individual patients: sex, family id,
# member/relproband, description descriptions are
# really long and ugly SCREAMING text, so need to clean up
# the control cases are so odd with this labeling scheme;
# but we'll deal with it as-is for now.
description = row[col.index('description')].strip()
short_desc = (description.split(';')[0]).capitalize()
gender = row[col.index('gender')].strip().lower()
affected = row[col.index('affected')].strip()
relprob = row[col.index('relprob')].strip()
if affected == '':
affected = 'unspecified'
elif affected in self.localtt:
affected = self.localtt[affected]
else:
LOG.warning(
'Novel Affected status %s at row: %i of %s',
affected, line_counter, raw)
patient_label = ' '.join((affected, gender, relprob))
if relprob == 'proband':
patient_label = ' '.join((
patient_label.strip(), 'with', short_desc))
else:
patient_label = ' '.join((
patient_label.strip(), 'of proband with', short_desc))
# ############# BUILD THE CELL LINE #############
# Adding the cell line as a typed individual.
cell_line_reagent_id = self.globaltt['cell line']
model.addIndividualToGraph(
cell_line_id, line_label, cell_line_reagent_id)
# add the equivalent id == dna_ref
dna_ref = row[col.index('dna_ref')].strip()
if dna_ref != '' and dna_ref != catalog_id:
equiv_cell_line = 'Coriell:' + dna_ref
# some of the equivalent ids are not defined
# in the source data; so add them
model.addIndividualToGraph(
equiv_cell_line, None, cell_line_reagent_id)
model.addSameIndividual(cell_line_id, equiv_cell_line)
# Cell line derives from patient
geno.addDerivesFrom(cell_line_id, patient_id)
geno.addDerivesFrom(cell_line_id, cell_type)
# Cell line a member of repository
family.addMember(repository, cell_line_id)
cat_remark = row[col.index('cat_remark')].strip()
if cat_remark != '':
model.addDescription(cell_line_id, cat_remark)
# Cell age_at_sampling
# TODO add the age nodes when modeled properly in #78
# if (age != ''):
# this would give a BNode that is an instance of Age.
# but i don't know how to connect
# the age node to the cell line? we need to ask @mbrush
# age_id = '_'+re.sub('\s+','_',age)
# gu.addIndividualToGraph(
示例15: _process_qtls_genetic_location
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addIndividualToGraph [as 别名]
#.........这里部分代码省略.........
if len(range_parts) == 2 and\
range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [
int(float(x.strip())) for x in re.split(r'-', range_cm)]
else:
LOG.info(
"A cM range we can't handle for QTL %s: %s",
qtl_id, range_cm)
elif position_cm != '':
match = re.match(r'([0-9]*\.[0-9]*)', position_cm)
if match is not None:
position_cm = match.group()
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop
# when schema can handle floats add in the genetic location
# based on the range
feature.addFeatureStartLocation(
start, chrom_in_build_id, None,
[self.globaltt['FuzzyPosition']])
feature.addFeatureEndLocation(
stop, chrom_in_build_id, None,
[self.globaltt['FuzzyPosition']])
feature.addFeatureToGraph()
# sometimes there's a peak marker, like a rsid.
# we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and \
re.match(r'rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
model.addIndividualToGraph(
dbsnp_id, None,
self.globaltt['sequence_alteration'])
model.addXref(qtl_id, dbsnp_id)
gene_id = gene_id.replace('uncharacterized ', '').strip()
if gene_id is not None and gene_id != '' and gene_id != '.'\
and re.fullmatch(r'[^ ]*', gene_id) is not None:
# we assume if no src is provided and gene_id is an integer,
# then it is an NCBI gene ... (okay, lets crank that back a notch)
if gene_id_src == '' and gene_id.isdigit() and \
gene_id in self.gene_info:
# LOG.info(
# 'Warm & Fuzzy saying %s is a NCBI gene for %s',
# gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '' and gene_id.isdigit():
LOG.warning(
'Cold & Prickely saying %s is a NCBI gene for %s',
gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '':
LOG.error(
' "%s" is a NOT NCBI gene for %s', gene_id, common_name)
gene_id_src = None
if gene_id_src == 'NCBIgene':
gene_id = 'NCBIGene:' + gene_id
# we will expect that these will get labels elsewhere
geno.addGene(gene_id, None)
# FIXME what is the right relationship here?
geno.addAffectedLocus(qtl_id, gene_id)