本文整理汇总了Python中dipper.models.Model.Model.addDescription方法的典型用法代码示例。如果您正苦于以下问题:Python Model.addDescription方法的具体用法?Python Model.addDescription怎么用?Python Model.addDescription使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Model.Model
的用法示例。
在下文中一共展示了Model.addDescription方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_gene_desc
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def process_gene_desc(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing Gene descriptions")
line_counter = 0
# geno = Genotype(g) # TODO unused
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar='\"')
for row in filereader:
if re.match(r'\#', ''.join(row)):
continue
line_counter += 1
if line_counter == 1:
continue
(gene_num, public_name, molecular_name, concise_description,
provisional_description, detailed_description,
automated_description, gene_class_description) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
gene_id = 'WormBase:'+gene_num
if concise_description != 'none available':
model.addDefinition(gene_id, concise_description)
# remove the description if it's identical to the concise
descs = {
'provisional': provisional_description,
'automated': automated_description,
'detailed': detailed_description,
'gene class': gene_class_description
}
for d in descs:
text = descs.get(d)
if text == concise_description \
or re.match(r'none', text) or text == '':
pass # don't use it
else:
text = ' '.join((text, '['+d+']'))
descs[d] = text
model.addDescription(gene_id, text)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例2: _process_straininfo
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
self.check_header(self.files['straininfo']['file'], f.readline())
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:' + str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:' + str(mpd_strainid)
model.addIndividualToGraph(strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
model.addSynonym(strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
model.addSameIndividual(strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
model.addSameIndividual(strain_id, reiken_id)
else:
if url != '':
model.addXref(strain_id, url, True)
if vendor != '':
model.addXref(
strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
model.addDescription(strain_id, desc)
# TODO make the panels as a resource collection
return
示例3: make_triples
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def make_triples(self, source, package):
model = Model(self.graph)
if source == 'drugbank':
for target in package['targets']:
model.addTriple(subject_id=package['unii'],
predicate_id=target['action'],
obj=target['uniprot'])
model.addLabel(subject_id=target['uniprot'], label=target['name'])
model.addTriple(subject_id=target['uniprot'],
predicate_id=Model.object_properties['subclass_of'],
obj='SO:0000104')
model.addTriple(subject_id=package['drugbank_id'],
predicate_id=Model.object_properties['equivalent_class'],
obj=package['unii'])
model.addTriple(subject_id=target['action'],
predicate_id='rdfs:subPropertyOf',
obj='RO:0002436')
model.addTriple(subject_id=package['unii'],
predicate_id=Model.object_properties['subclass_of'],
obj='CHEBI:23367')
if source == 'drugcentral':
for indication in package['indications']:
model.addTriple(subject_id=package['unii'],
predicate_id='RO:0002606',
obj=indication['snomed_id'])
model.addTriple(subject_id=package['unii'],
predicate_id=Model.object_properties['subclass_of'],
obj='CHEBI:23367')
model.addTriple(subject_id=indication['snomed_id'],
predicate_id=Model.object_properties['subclass_of'],
obj='DOID:4')
model.addLabel(subject_id=indication['snomed_id'], label=indication['snomed_name'])
for interaction in package['interactions']:
model.addTriple(subject_id=package['unii'],
predicate_id='RO:0002436',
obj=interaction['uniprot'])
# model.addLabel(subject_id=interaction['uniprot'], label='Protein_{}'.format(interaction['uniprot']))
model.addLabel(subject_id=interaction['uniprot'], label=interaction['target_name'])
model.addTriple(subject_id=package['unii'],
predicate_id=Model.object_properties['subclass_of'],
obj='CHEBI:23367')
model.addDescription(subject_id=interaction['uniprot'], description=interaction['target_class'])
model.addTriple(subject_id=interaction['uniprot'],
predicate_id=Model.object_properties['subclass_of'],
obj='SO:0000104')
return
示例4: make_triples
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def make_triples(self, source, package):
model = Model(self.graph)
if source == 'drugbank':
for target in package['targets']:
model.addTriple(
subject_id=package['unii'],
predicate_id=target['action'],
obj=target['uniprot'])
model.addLabel(subject_id=target['uniprot'], label=target['name'])
model.addTriple(
subject_id=target['uniprot'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['polypeptide'])
model.addTriple(
subject_id=package['drugbank_id'],
predicate_id=self.globaltt['equivalent_class'],
obj=package['unii'])
model.addTriple(
subject_id=target['action'],
predicate_id=self.globaltt['subPropertyOf'],
obj=self.globaltt['molecularly_interacts_with'])
model.addTriple(
subject_id=package['unii'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['molecular entity'])
if source == 'drugcentral':
for indication in package['indications']:
model.addTriple(
subject_id=package['unii'],
predicate_id=self.globaltt['is substance that treats'],
obj=indication['snomed_id'])
model.addTriple(
subject_id=package['unii'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['molecular entity'])
model.addTriple(
subject_id=indication['snomed_id'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['disease'])
model.addLabel(
subject_id=indication['snomed_id'], label=indication['snomed_name'])
for interaction in package['interactions']:
model.addTriple(
subject_id=package['unii'],
predicate_id=self.globaltt['molecularly_interacts_with'],
obj=interaction['uniprot'])
# model.addLabel(
# subject_id=interaction['uniprot'],
# label='Protein_{}'.format(interaction['uniprot']))
model.addLabel(
subject_id=interaction['uniprot'], label=interaction['target_name'])
model.addTriple(
subject_id=package['unii'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['molecular entity'])
model.addDescription(
subject_id=interaction['uniprot'],
description=interaction['target_class'])
model.addTriple(
subject_id=interaction['uniprot'],
predicate_id=self.globaltt['subclass_of'],
obj=self.globaltt['polypeptide'])
return
示例5: process_gene_interaction
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def process_gene_interaction(self, limit):
"""
The gene interaction file includes identified interactions,
that are between two or more gene (products).
In the case of interactions with >2 genes, this requires creating
groups of genes that are involved in the interaction.
From the wormbase help list: In the example WBInteraction000007779
it would likely be misleading to suggest that lin-12 interacts with
(suppresses in this case) smo-1 ALONE or that lin-12 suppresses let-60
ALONE; the observation in the paper; see Table V in paper PMID:15990876
was that a lin-12 allele (heterozygous lin-12(n941/+)) could suppress
the "multivulva" phenotype induced synthetically by simultaneous
perturbation of BOTH smo-1 (by RNAi) AND let-60 (by the n2021 allele).
So this is necessarily a three-gene interaction.
Therefore, we can create groups of genes based on their "status" of
Effector | Effected.
Status: IN PROGRESS
:param limit:
:return:
"""
raw = '/'.join((self.rawdir, self.files['gene_interaction']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
logger.info("Processing gene interaction associations")
line_counter = 0
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar="'")
for row in filereader:
line_counter += 1
if re.match(r'#', ''.join(row)):
continue
(interaction_num, interaction_type, interaction_subtype,
summary, citation) = row[0:5]
# print(row)
interaction_id = 'WormBase:'+interaction_num
# TODO deal with subtypes
interaction_type_id = None
if interaction_type == 'Genetic':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'genetically_interacts_with']
elif interaction_type == 'Physical':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'molecularly_interacts_with']
elif interaction_type == 'Regulatory':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'regulates']
else:
logger.info(
"An interaction type I don't understand %s",
interaction_type)
num_interactors = (len(row) - 5) / 3
if num_interactors != 2:
logger.info(
"Skipping interactions with !=2 participants:\n %s",
str(row))
continue
gene_a_id = 'WormBase:'+row[5]
gene_b_id = 'WormBase:'+row[8]
if self.testMode \
and gene_a_id not in self.test_ids['gene'] \
and gene_b_id not in self.test_ids['gene']:
continue
assoc = InteractionAssoc(
g, self.name, gene_a_id, gene_b_id, interaction_type_id)
assoc.set_association_id(interaction_id)
assoc.add_association_to_graph()
assoc_id = assoc.get_association_id()
# citation is not a pmid or WBref - get this some other way
model.addDescription(assoc_id, summary)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例6: process_feature_loc
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
#.........这里部分代码省略.........
fid = 'WormBase:'+attribute_dict.get('variation')
flabel = attribute_dict.get('public_name')
sub = attribute_dict.get('substitution')
ins = attribute_dict.get('insertion')
# if it's a variation:
# variation=WBVar00604246;public_name=gk320600;strain=VC20384;substitution=C/T
desc = ''
if sub is not None:
desc = 'substitution='+sub
if ins is not None:
desc = 'insertion='+ins
# keep track of the strains with this variation,
# for later processing
strain_list = attribute_dict.get('strain')
if strain_list is not None:
for s in re.split(r',', strain_list):
if s.strip() not in strain_to_variant_map:
strain_to_variant_map[s.strip()] = set()
strain_to_variant_map[s.strip()].add(fid)
# if feature_type_label == 'RNAi_reagent':
# Target=WBRNAi00096030 1 4942
# this will tell us where the RNAi is actually binding
# target = attribute_dict.get('Target') # TODO unused
# rnai_num = re.split(r' ', target)[0] # TODO unused
# it will be the reagent-targeted-gene that has a position,
# (i think)
# TODO finish the RNAi binding location
name = attribute_dict.get('Name')
polymorphism = attribute_dict.get('polymorphism')
if fid is None:
if name is not None and re.match(r'WBsf', name):
fid = 'WormBase:'+name
name = None
else:
continue
if self.testMode \
and re.sub(r'WormBase:', '', fid) \
not in self.test_ids['gene']+self.test_ids['allele']:
continue
# these really aren't that interesting
if polymorphism is not None:
continue
if name is not None and not re.search(name, fid):
if flabel is None:
flabel = name
else:
model.addSynonym(fid, name)
if desc is not None:
model.addDescription(fid, desc)
alias = attribute_dict.get('Alias')
biotype = attribute_dict.get('biotype')
note = attribute_dict.get('Note')
other_name = attribute_dict.get('other_name')
for n in [alias, other_name]:
if n is not None:
model.addSynonym(fid, other_name)
ftype = self.get_feature_type_by_class_and_biotype(
feature_type_label, biotype)
chr_id = makeChromID(chrom, build_id, 'CHR')
geno.addChromosomeInstance(chrom, build_id, build_num)
feature = Feature(g, fid, flabel, ftype)
feature.addFeatureStartLocation(start, chr_id, strand)
feature.addFeatureEndLocation(start, chr_id, strand)
feature_is_class = False
if feature_type_label == 'gene':
feature_is_class = True
feature.addFeatureToGraph(True, None, feature_is_class)
if note is not None:
model.addDescription(fid, note)
if not self.testMode \
and limit is not None and line_counter > limit:
break
# RNAi reagents:
# I RNAi_primary RNAi_reagent 4184 10232 . + . Target=WBRNAi00001601 1 6049 +;laboratory=YK;history_name=SA:yk326e10
# I RNAi_primary RNAi_reagent 4223 10147 . + . Target=WBRNAi00033465 1 5925 +;laboratory=SV;history_name=MV_SV:mv_G_YK5052
# I RNAi_primary RNAi_reagent 5693 9391 . + . Target=WBRNAi00066135 1 3699 +;laboratory=CH
# TODO TF bindiing sites and network:
# I TF_binding_site_region TF_binding_site 1861 2048 . + . Name=WBsf292777;tf_id=WBTranscriptionFactor000025;tf_name=DAF-16
# I TF_binding_site_region TF_binding_site 3403 4072 . + . Name=WBsf331847;tf_id=WBTranscriptionFactor000703;tf_name=DPL-1
return
示例7: OBAN
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
#.........这里部分代码省略.........
def _is_valid(self):
# check if sub/obj/rel are none...throw error
if self.sub is None:
raise ValueError('No subject set for this association')
if self.obj is None:
raise ValueError('No object set for this association')
if self.rel is None:
raise ValueError('No relation set for this association')
return True
def _add_basic_association_to_graph(self):
if not self._is_valid():
return
self.graph.addTriple(self.sub, self.rel, self.obj)
if self.assoc_id is None:
self.set_association_id()
self.model.addType(self.assoc_id, self.assoc_types['association'])
self.graph.addTriple(
self.assoc_id, self.object_properties['has_subject'], self.sub)
self.graph.addTriple(
self.assoc_id, self.object_properties['has_object'], self.obj)
self.graph.addTriple(
self.assoc_id, self.object_properties['has_predicate'], self.rel)
if self.description is not None:
self.model.addDescription(self.assoc_id, self.description)
if self.evidence is not None and len(self.evidence) > 0:
for e in self.evidence:
self.graph.addTriple(
self.assoc_id, self.object_properties['has_evidence'], e)
if self.source is not None and len(self.source) > 0:
for s in self.source:
if re.match('http', s):
# TODO assume that the source is a publication?
# use Reference class here
self.graph.addTriple(
self.assoc_id, self.object_properties['has_source'],
s, True)
else:
self.graph.addTriple(
self.assoc_id, self.object_properties['has_source'], s)
if self.provenance is not None and len(self.provenance) > 0:
for p in self.provenance:
self.graph.addTriple(
self.assoc_id, self.object_properties['has_provenance'], p)
if self.date is not None and len(self.date) > 0:
for d in self.date:
self.graph.addTriple(
object_is_literal=True,
subject_id=self.assoc_id,
predicate_id=self.datatype_properties['created_on'],
obj=d)
if self.score is not None:
示例8: __init__
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
class Dataset:
"""
this will produce the metadata about a dataset
following the example laid out here:
http://htmlpreview.github.io/?
https://github.com/joejimbo/HCLSDatasetDescriptions/blob/master/Overview.html#appendix_1
(mind the wrap)
"""
def __init__(self, identifier, title, url, description=None,
license_url=None, data_rights=None, graph_type=None,
file_handle=None):
if graph_type is None:
self.graph = RDFGraph(None, identifier) #
elif graph_type == 'streamed_graph':
self.graph = StreamedGraph(True, file_handle=file_handle)
elif graph_type == 'rdf_graph':
self.graph = RDFGraph()
self.model = Model(self.graph)
self.identifier = ':' + identifier
self.version = None
self.date_issued = None
# The data_accesed value is later used as an literal of properties
# such as dct:issued, which needs to conform xsd:dateTime format.
# TODO ... we need to have a talk about typed literals and SPARQL
self.date_accessed = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
self.citation = set()
self.license = license_url
self.model.addType(self.identifier, 'dctypes:Dataset')
self.graph.addTriple(self.identifier, 'dct:title', title, True)
self.graph.addTriple(
self.identifier, 'dct:identifier',
identifier, object_is_literal=True)
self.graph.addTriple(self.identifier, 'foaf:page', url)
# maybe in the future add the logo here:
# schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> .
# TODO add the licence info
# FIXME:Temporarily making this in IF statement,
# can revert after all current resources are updated.
if license_url is not None:
self.graph.addTriple(
self.identifier, 'dct:license', license_url)
else:
logger.debug('No license provided.')
if data_rights is not None:
self.graph.addTriple(
self.identifier, 'dct:rights',
data_rights, object_is_literal=True)
else:
logger.debug('No rights provided.')
if description is not None:
self.model.addDescription(self.identifier, description)
return
def setVersion(self, date_issued, version_id=None):
"""
Legacy function...
should use the other set_* for version and date
as of 2016-10-20 used in:
dipper/sources/HPOAnnotations.py 139:
dipper/sources/CTD.py 99:
dipper/sources/BioGrid.py 100:
dipper/sources/MGI.py 255:
dipper/sources/EOM.py 93:
dipper/sources/Coriell.py 200:
dipper/sources/MMRRC.py 77:
# TODO set as deprecated
:param date_issued:
:param version_id:
:return:
"""
if date_issued is not None:
self.set_date_issued(date_issued)
elif version_id is not None:
self.set_version_by_num(version_id)
else:
logger.error("date or version not set!")
# TODO throw error
return
if version_id is not None:
self.set_version_by_num(version_id)
else:
logger.info("set version to %s", self.version)
self.set_version_by_date(date_issued)
logger.info("set version to %s", self.version)
return
#.........这里部分代码省略.........
示例9: _process_ortholog_classes
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def _process_ortholog_classes(self, limit=None):
"""
This method add the KEGG orthology classes to the graph.
If there's an embedded enzyme commission number,
that is added as an xref.
Triples created:
<orthology_class_id> is a class
<orthology_class_id> has label <orthology_symbols>
<orthology_class_id> has description <orthology_description>
:param limit:
:return:
"""
LOG.info("Processing ortholog classes")
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in reader:
(orthology_class_id, orthology_class_name) = row
if self.test_mode and orthology_class_id \
not in self.test_ids['orthology_classes']:
continue
# The orthology class is essentially a KEGG gene ID
# that is species agnostic.
# Add the ID and label as a gene family class
other_labels = re.split(r'[;,]', orthology_class_name)
# the first one is the label we'll use
orthology_label = other_labels[0]
orthology_class_id = 'KEGG-'+orthology_class_id.strip()
orthology_type = self.globaltt['gene_family']
model.addClassToGraph(
orthology_class_id, orthology_label, orthology_type)
if len(other_labels) > 1:
# add the rest as synonyms
# todo skip the first
for s in other_labels:
model.addSynonym(orthology_class_id, s.strip())
# add the last one as the description
d = other_labels[len(other_labels)-1]
model.addDescription(orthology_class_id, d)
# add the enzyme commission number (EC:1.2.99.5)as an xref
# sometimes there's two, like [EC:1.3.5.1 1.3.5.4]
# can also have a dash, like EC:1.10.3.-
ec_matches = re.findall(r'((?:\d+|\.|-){5,7})', d)
if ec_matches is not None:
for ecm in ec_matches:
model.addXref(orthology_class_id, 'EC:' + ecm)
if not self.test_mode and limit is not None and reader.line_num > limit:
break
LOG.info("Done with ortholog classes")
示例10: process_gaf
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def process_gaf(self, file, limit, id_map=None, eco_map=None):
if self.test_mode:
graph = self.testgraph
else:
graph = self.graph
model = Model(graph)
geno = Genotype(graph)
LOG.info("Processing Gene Associations from %s", file)
line_counter = 0
uniprot_hit = 0
uniprot_miss = 0
if 7955 in self.tax_ids:
zfin = ZFIN(self.graph_type, self.are_bnodes_skized)
if 6239 in self.tax_ids:
wbase = WormBase(self.graph_type, self.are_bnodes_skized)
with gzip.open(file, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
# comments start with exclamation
if re.match(r'!', ''.join(row)):
continue
if len(row) > 17 or len(row) < 15:
LOG.warning(
"Wrong number of columns %i, expected 15 or 17\n%s",
len(row), row)
continue
if 17 > len(row) >= 15:
row += [""] * (17 - len(row))
(dbase,
gene_num,
gene_symbol,
qualifier,
go_id,
ref,
eco_symbol,
with_or_from,
aspect,
gene_name,
gene_synonym,
object_type,
taxon,
date,
assigned_by,
annotation_extension,
gene_product_form_id) = row
# test for required fields
if (dbase == '' or gene_num == '' or gene_symbol == '' or
go_id == '' or ref == '' or eco_symbol == '' or
aspect == '' or object_type == '' or taxon == '' or
date == '' or assigned_by == ''):
LOG.error(
"Missing required part of annotation on row %d:\n"+'\t'
.join(row), line_counter)
continue
# deal with qualifier NOT, contributes_to, colocalizes_with
if re.search(r'NOT', qualifier):
continue
if dbase in self.localtt:
dbase = self.localtt[dbase]
uniprotid = None
gene_id = None
if dbase == 'UniProtKB':
if id_map is not None and gene_num in id_map:
gene_id = id_map[gene_num]
uniprotid = ':'.join((dbase, gene_num))
(dbase, gene_num) = gene_id.split(':')
uniprot_hit += 1
else:
# LOG.warning(
# "UniProt id %s is without a 1:1 mapping to entrez/ensembl",
# gene_num)
uniprot_miss += 1
continue
else:
gene_num = gene_num.split(':')[-1] # last
gene_id = ':'.join((dbase, gene_num))
if self.test_mode and not(
re.match(r'NCBIGene', gene_id) and
int(gene_num) in self.test_ids):
continue
model.addClassToGraph(gene_id, gene_symbol)
if gene_name != '':
model.addDescription(gene_id, gene_name)
if gene_synonym != '':
for syn in re.split(r'\|', gene_synonym):
model.addSynonym(gene_id, syn.strip())
if re.search(r'\|', taxon):
#.........这里部分代码省略.........
示例11: process_gaf
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
def process_gaf(self, file, limit, id_map=None):
if self.testMode:
g = self.testgraph
else:
g = self.graph
model = Model(g)
geno = Genotype(g)
logger.info("Processing Gene Associations from %s", file)
line_counter = 0
if 7955 in self.tax_ids:
zfin = ZFIN(self.graph_type, self.are_bnodes_skized)
elif 6239 in self.tax_ids:
wbase = WormBase(self.graph_type, self.are_bnodes_skized)
with gzip.open(file, 'rb') as csvfile:
filereader = csv.reader(io.TextIOWrapper(csvfile, newline=""),
delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
# comments start with exclamation
if re.match(r'!', ''.join(row)):
continue
(db, gene_num, gene_symbol, qualifier, go_id, ref, eco_symbol,
with_or_from, aspect, gene_name, gene_synonym, object_type,
taxon, date, assigned_by, annotation_extension,
gene_product_form_id) = row
# test for required fields
if (db == '' or gene_num == '' or gene_symbol == '' or
go_id == '' or ref == '' or eco_symbol == '' or
aspect == '' or object_type == '' or taxon == '' or
date == '' or assigned_by == ''):
logger.error(
"Missing required part of annotation " +
"on row %d:\n"+'\t'.join(row),
line_counter)
continue
# deal with qualifier NOT, contributes_to, colocalizes_with
if re.search(r'NOT', qualifier):
continue
db = self.clean_db_prefix(db)
uniprotid = None
gene_id = None
if db == 'UniProtKB':
mapped_ids = id_map.get(gene_num)
if id_map is not None and mapped_ids is not None:
if len(mapped_ids) == 1:
gene_id = mapped_ids[0]
uniprotid = ':'.join((db, gene_num))
gene_num = re.sub(r'\w+\:', '', gene_id)
elif len(mapped_ids) > 1:
# logger.warning(
# "Skipping gene id mapped for >1 gene %s -> %s",
# gene_num, str(mapped_ids))
continue
else:
continue
elif db == 'MGI':
gene_num = re.sub(r'MGI:', '', gene_num)
gene_id = ':'.join((db, gene_num))
gene_id = re.sub(r'MGI\:MGI\:', 'MGI:', gene_id)
else:
gene_id = ':'.join((db, gene_num))
if self.testMode \
and not(
re.match(r'NCBIGene', gene_id) and
int(gene_num) in self.test_ids):
continue
model.addClassToGraph(gene_id, gene_symbol)
if gene_name != '':
model.addDescription(gene_id, gene_name)
if gene_synonym != '':
for s in re.split(r'\|', gene_synonym):
model.addSynonym(gene_id, s.strip())
if re.search(r'\|', taxon):
# TODO add annotations with >1 taxon
logger.info(">1 taxon (%s) on line %d. skipping", taxon,
line_counter)
else:
tax_id = re.sub(r'taxon:', 'NCBITaxon:', taxon)
geno.addTaxon(tax_id, gene_id)
assoc = Assoc(g, self.name)
assoc.set_subject(gene_id)
assoc.set_object(go_id)
eco_id = self.map_go_evidence_code_to_eco(eco_symbol)
if eco_id is not None:
assoc.add_evidence(eco_id)
refs = re.split(r'\|', ref)
for r in refs:
#.........这里部分代码省略.........
示例12: OBAN
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
class Assoc:
"""
A base class for OBAN (Monarch)-style associations,
to enable attribution of source and evidence
on statements.
"""
def __init__(self, graph, definedby, sub=None, obj=None, pred=None):
if isinstance(graph, Graph):
self.graph = graph
else:
raise ValueError("{} is not a graph".format(graph))
self.model = Model(self.graph)
self.globaltt = self.graph.globaltt
self.globaltcid = self.graph.globaltcid
self.curie_map = self.graph.curie_map
# core parts of the association
self.definedby = definedby
self.sub = sub
self.obj = obj
self.rel = pred
self.assoc_id = None
self.description = None
self.source = []
self.evidence = []
self.date = []
# this is going to be used for the refactored evidence/provenance
self.provenance = []
self.score = None
self.score_type = None
self.score_unit = None
return
def _is_valid(self):
# check if sub/obj/rel are none...raise error
if self.sub is None:
raise ValueError(
'No subject set for this association <%s> <%s> <%s>',
self.sub, self.rel, self.obj
)
if self.obj is None:
raise ValueError(
'No object set for this association <%s> <%s> <%s>',
self.sub, self.rel, self.obj
)
if self.rel is None:
raise ValueError(
'No predicate set for this association <%s> <%s> <%s>',
self.sub, self.rel, self.obj
)
# Are subject & predicate, either a curie or IRI
pfx = self.sub.split(':')[0]
if pfx not in self.curie_map.keys() and \
pfx not in ['_', 'http', 'https', 'ftp']:
raise ValueError(
'Invalid Subject for this association <%s> <%s> <%s>',
self.sub, self.rel, self.obj
)
pfx = self.rel.split(':')[0]
if pfx not in self.curie_map.keys() and \
pfx not in ['_', 'http', 'https', 'ftp']:
raise ValueError(
'Invalid Predicate for this association <%s> <%s> <%s>',
self.sub, self.rel, self.obj
)
return True
def add_association_to_graph(self):
if not self._is_valid():
return
self.graph.addTriple(self.sub, self.rel, self.obj)
if self.assoc_id is None:
self.set_association_id()
assert self.assoc_id is not None
self.model.addType(self.assoc_id, self.model.globaltt['association'])
self.graph.addTriple(
self.assoc_id, self.globaltt['association has subject'], self.sub)
self.graph.addTriple(
self.assoc_id, self.globaltt['association has object'], self.obj)
self.graph.addTriple(
self.assoc_id, self.globaltt['association has predicate'], self.rel)
if self.description is not None:
self.model.addDescription(self.assoc_id, self.description)
if self.evidence is not None and len(self.evidence) > 0:
for evi in self.evidence:
self.graph.addTriple(self.assoc_id, self.globaltt['has evidence'], evi)
if self.source is not None and len(self.source) > 0:
#.........这里部分代码省略.........
示例13: _process_data
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
#.........这里部分代码省略.........
# Make the sex-qualified genotype,
# which is what the phenotype is associated with
sex_qualified_genotype_id = \
self.make_id((
colony_id + phenotyping_center + zygosity +
strain_accession_id + sex))
sex_qualified_genotype_label = genotype_name + ' (' + sex + ')'
sq_type_id = self.resolve(sex, False)
if sq_type_id == sex:
sq_type_id = self.globaltt['intrinsic_genotype']
LOG.warning(
"Unknown sex qualifier %s, adding as intrinsic_genotype",
sex)
geno.addGenotype(
sex_qualified_genotype_id, sex_qualified_genotype_label, sq_type_id)
geno.addParts(
genotype_id, sex_qualified_genotype_id,
self.globaltt['has_variant_part'])
if genomic_background_id is not None and genomic_background_id != '':
# Add the taxon to the genomic_background_id
geno.addTaxon(taxon_id, genomic_background_id)
else:
# add it as the genomic background
geno.addTaxon(taxon_id, genotype_id)
# ############# BUILD THE G2P ASSOC #############
# from an old email dated July 23 2014:
# Phenotypes associations are made to
# imits colony_id+center+zygosity+gender
phenotype_id = mp_term_id
# it seems that sometimes phenotype ids are missing.
# indicate here
if phenotype_id is None or phenotype_id == '':
LOG.warning(
"No phenotype id specified for row %d: %s",
line_counter, str(row))
continue
# hard coded ECO code
eco_id = self.globaltt['mutant phenotype evidence']
# the association comes as a result of a g2p from
# a procedure in a pipeline at a center and parameter tested
assoc = G2PAssoc(
graph, self.name, sex_qualified_genotype_id, phenotype_id)
assoc.add_evidence(eco_id)
# assoc.set_score(float(p_value))
# TODO add evidence instance using
# pipeline_stable_id +
# procedure_stable_id +
# parameter_stable_id
assoc.add_association_to_graph()
assoc_id = assoc.get_association_id()
model._addSexSpecificity(assoc_id, self.resolve(sex))
# add a free-text description
try:
description = ' '.join((
mp_term_name, 'phenotype determined by', phenotyping_center,
'in an', procedure_name, 'assay where', parameter_name.strip(),
'was measured with an effect_size of',
str(round(float(effect_size), 5)),
'(p =', "{:.4e}".format(float(p_value)), ').'))
except ValueError:
description = ' '.join((
mp_term_name, 'phenotype determined by', phenotyping_center,
'in an', procedure_name, 'assay where', parameter_name.strip(),
'was measured with an effect_size of', str(effect_size),
'(p =', "{0}".format(p_value), ').'))
study_bnode = self._add_study_provenance(
phenotyping_center, colony_raw, project_fullname, pipeline_name,
pipeline_stable_id, procedure_stable_id, procedure_name,
parameter_stable_id, parameter_name, statistical_method,
resource_name, line_counter)
evidence_line_bnode = self._add_evidence(
assoc_id, eco_id, p_value, percentage_change, effect_size,
study_bnode)
self._add_assertion_provenance(assoc_id, evidence_line_bnode)
model.addDescription(evidence_line_bnode, description)
# resource_id = resource_name
# assoc.addSource(graph, assoc_id, resource_id)
if not self.test_mode and limit is not None and line_counter > limit:
break
return
示例14: _transform_entry
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
#.........这里部分代码省略.........
ncbifeature = self._get_mapped_gene_ids(e['entry'], g)
if len(ncbifeature) == 1:
feature_id = 'NCBIGene:'+str(ncbifeature[0])
# add this feature as a cause for the omim disease
# TODO SHOULD I EVEN DO THIS HERE?
assoc = G2PAssoc(g, self.name, feature_id, omimid)
assoc.add_association_to_graph()
elif len(ncbifeature) > 1:
logger.info(
"Its ambiguous when %s maps to >1 gene id: %s",
omimid, str(ncbifeature))
else: # no ncbi feature, make an anonymous one
feature_id = self._make_anonymous_feature(str(omimnum))
feature_label = abbrev
elif omimtype == Genotype.genoparts['gene']:
feature_id = omimid
is_gene = True
else:
# 158900 falls into this category
feature_id = self._make_anonymous_feature(str(omimnum))
if abbrev is not None:
feature_label = abbrev
omimtype = \
Genotype.genoparts[
'heritable_phenotypic_marker']
if feature_id is not None:
if 'comments' in genemap:
# add a comment to this feature
comment = genemap['comments']
if comment.strip() != '':
model.addDescription(feature_id, comment)
if 'cytoLocation' in genemap:
cytoloc = genemap['cytoLocation']
# parse the cytoloc.
# add this omim thing as
# a subsequence of the cytofeature
# 18p11.3-p11.2
# FIXME
# add the other end of the range,
# but not sure how to do that
# not sure if saying subsequence of feature
# is the right relationship
f = Feature(g, feature_id, feature_label, omimtype)
if 'chromosomeSymbol' in genemap:
chrom_num = str(genemap['chromosomeSymbol'])
chrom = makeChromID(chrom_num, tax_num, 'CHR')
geno.addChromosomeClass(
chrom_num, tax_id, tax_label)
# add the positional information, if available
fstart = fend = -1
if 'chromosomeLocationStart' in genemap:
fstart = genemap['chromosomeLocationStart']
if 'chromosomeLocationEnd' in genemap:
fend = genemap['chromosomeLocationEnd']
if fstart >= 0:
# make the build-specific chromosome
chrom_in_build = makeChromID(chrom_num,
build_num,
'MONARCH')
# then, add the chromosome instance
# (from the given build)
示例15: _process_data
# 需要导入模块: from dipper.models.Model import Model [as 别名]
# 或者: from dipper.models.Model.Model import addDescription [as 别名]
#.........这里部分代码省略.........
patient_label.strip(), 'with', short_desc))
else:
patient_label = ' '.join((
patient_label.strip(), 'of proband with', short_desc))
# ############# BUILD THE CELL LINE #############
# Adding the cell line as a typed individual.
cell_line_reagent_id = self.globaltt['cell line']
model.addIndividualToGraph(
cell_line_id, line_label, cell_line_reagent_id)
# add the equivalent id == dna_ref
dna_ref = row[col.index('dna_ref')].strip()
if dna_ref != '' and dna_ref != catalog_id:
equiv_cell_line = 'Coriell:' + dna_ref
# some of the equivalent ids are not defined
# in the source data; so add them
model.addIndividualToGraph(
equiv_cell_line, None, cell_line_reagent_id)
model.addSameIndividual(cell_line_id, equiv_cell_line)
# Cell line derives from patient
geno.addDerivesFrom(cell_line_id, patient_id)
geno.addDerivesFrom(cell_line_id, cell_type)
# Cell line a member of repository
family.addMember(repository, cell_line_id)
cat_remark = row[col.index('cat_remark')].strip()
if cat_remark != '':
model.addDescription(cell_line_id, cat_remark)
# Cell age_at_sampling
# TODO add the age nodes when modeled properly in #78
# if (age != ''):
# this would give a BNode that is an instance of Age.
# but i don't know how to connect
# the age node to the cell line? we need to ask @mbrush
# age_id = '_'+re.sub('\s+','_',age)
# gu.addIndividualToGraph(
# graph,age_id,age,self.globaltt['age'])
# gu.addTriple(
# graph,age_id,self.globaltt['has measurement value'],age,
# True)
# ############# BUILD THE PATIENT #############
# Add the patient ID as an individual.
model.addPerson(patient_id, patient_label)
# TODO map relationship to proband as a class
# (what ontology?)
# Add race of patient
# FIXME: Adjust for subcategories based on ethnicity field
# EDIT: There are 743 different entries for ethnicity...
# Too many to map?
# Add ethnicity as literal in addition to the mapped race?
# Adjust the ethnicity txt (if using)
# to initial capitalization to remove ALLCAPS
# TODO race should go into the individual's background
# and abstracted out to the Genotype class punting for now.
# if race != '':