本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.addDescription方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.addDescription方法的具体用法?Python GraphUtils.addDescription怎么用?Python GraphUtils.addDescription使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.addDescription方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_gene_desc
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def process_gene_desc(self, limit):
raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing Gene descriptions")
line_counter = 0
# geno = Genotype(g) # TODO unused
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar='\"')
for row in filereader:
if re.match(r'\#', ''.join(row)):
continue
line_counter += 1
if line_counter == 1:
continue
(gene_num, public_name, molecular_name, concise_description,
provisional_description, detailed_description,
automated_description, gene_class_description) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
gene_id = 'WormBase:'+gene_num
if concise_description != 'none available':
gu.addDefinition(g, gene_id, concise_description)
# remove the description if it's identical to the concise
descs = {
'provisional': provisional_description,
'automated': automated_description,
'detailed': detailed_description,
'gene class': gene_class_description
}
for d in descs:
text = descs.get(d)
if text == concise_description \
or re.match(r'none', text) or text == '':
pass # don't use it
else:
text = ' '.join((text, '['+d+']'))
descs[d] = text
gu.addDescription(g, gene_id, text)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例2: _process_straininfo
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def _process_straininfo(self, limit):
# line_counter = 0 # TODO unused
if self.testMode:
g = self.testgraph
else:
g = self.graph
logger.info("Processing measurements ...")
raw = '/'.join((self.rawdir, self.files['straininfo']['file']))
tax_id = 'NCBITaxon:10090'
gu = GraphUtils(curie_map.get())
with open(raw, 'r') as f:
reader = csv.reader(f, delimiter=',', quotechar='\"')
f.readline() # read the header row; skip
for row in reader:
(strain_name, vendor, stocknum, panel, mpd_strainid,
straintype, n_proj, n_snp_datasets, mpdshortname, url) = row
# C57BL/6J,J,000664,,7,IN,225,17,,http://jaxmice.jax.org/strain/000664.html
# create the strain as an instance of the taxon
if self.testMode and \
'MPD:'+str(mpd_strainid) not in self.test_ids:
continue
strain_id = 'MPD-strain:'+str(mpd_strainid)
gu.addIndividualToGraph(g, strain_id, strain_name, tax_id)
if mpdshortname.strip() != '':
gu.addSynonym(g, strain_id, mpdshortname.strip())
self.idlabel_hash[strain_id] = strain_name
# make it equivalent to the vendor+stock
if stocknum != '':
if vendor == 'J':
jax_id = 'JAX:'+stocknum
gu.addSameIndividual(g, strain_id, jax_id)
elif vendor == 'Rbrc':
# reiken
reiken_id = 'RBRC:'+re.sub(r'RBRC', '', stocknum)
gu.addSameIndividual(g, strain_id, reiken_id)
else:
if url != '':
gu.addXref(g, strain_id, url, True)
if vendor != '':
gu.addXref(
g, strain_id, ':'.join((vendor, stocknum)),
True)
# add the panel information
if panel != '':
desc = panel+' [panel]'
gu.addDescription(g, strain_id, desc)
# TODO make the panels as a resource collection
return
示例3: _process_ortholog_classes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def _process_ortholog_classes(self, limit=None):
"""
This method add the KEGG orthology classes to the graph.
Triples created:
<orthology_class_id> is a class
<orthology_class_id> has label <orthology_symbols>
<orthology_class_id> has description <orthology_description>
:param limit:
:return:
"""
logger.info("Processing ortholog classes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(orthology_class_id, orthology_class_name) = row
if self.testMode and orthology_class_id not in self.test_ids['ortholog_classes']:
continue
# FIXME: What's the proper route for this?
# The orthology class is essentially a KEGG gene ID that is species agnostic.
# Add the ID and label as a class. Would it be considered a gene as well?
other_labels = re.split(';', orthology_class_name)
orthology_label = other_labels[0] # the first one is the label we'll use
orthology_class_id = 'KEGG-'+orthology_class_id.strip()
orthology_type = OrthologyAssoc.terms['gene_family']
gu.addClassToGraph(g, orthology_class_id, orthology_label, orthology_type)
if len(other_labels) > 1:
# add the rest as synonyms
# todo skip the first
for s in other_labels:
gu.addSynonym(g, orthology_class_id, s)
# add the last one as the description
gu.addDescription(g, orthology_class_id, other_labels[len(other_labels)-1])
if (not self.testMode) and (limit is not None and line_counter > limit):
break
logger.info("Done with ortholog classes")
return
示例4: process_gene_interaction
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def process_gene_interaction(self, limit):
"""
The gene interaction file includes identified interactions,
that are between two or more gene (products).
In the case of interactions with >2 genes, this requires creating
groups of genes that are involved in the interaction.
From the wormbase help list: In the example WBInteraction000007779
it would likely be misleading to suggest that lin-12 interacts with
(suppresses in this case) smo-1 ALONE or that lin-12 suppresses let-60
ALONE; the observation in the paper; see Table V in paper PMID:15990876
was that a lin-12 allele (heterozygous lin-12(n941/+)) could suppress
the "multivulva" phenotype induced synthetically by simultaneous
perturbation of BOTH smo-1 (by RNAi) AND let-60 (by the n2021 allele).
So this is necessarily a three-gene interaction.
Therefore, we can create groups of genes based on their "status" of
Effector | Effected.
Status: IN PROGRESS
:param limit:
:return:
"""
raw = '/'.join((self.rawdir, self.files['gene_interaction']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
logger.info("Processing gene interaction associations")
line_counter = 0
with gzip.open(raw, 'rb') as csvfile:
filereader = csv.reader(
io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
quotechar="'")
for row in filereader:
line_counter += 1
if re.match(r'#', ''.join(row)):
continue
(interaction_num, interaction_type, interaction_subtype,
summary, citation) = row[0:5]
print(row)
interaction_id = 'WormBase:'+interaction_num
# TODO deal with subtypes
interaction_type_id = None
if interaction_type == 'Genetic':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'genetically_interacts_with']
elif interaction_type == 'Physical':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'molecularly_interacts_with']
elif interaction_type == 'Regulatory':
interaction_type_id = \
InteractionAssoc.interaction_object_properties[
'regulates']
else:
logger.info(
"An interaction type I don't understand %s",
interaction_type)
num_interactors = (len(row) - 5) / 3
if num_interactors != 2:
logger.info(
"Skipping interactions with !=2 participants:\n %s",
str(row))
continue
gene_a_id = 'WormBase:'+row[5]
gene_b_id = 'WormBase:'+row[8]
if self.testMode \
and gene_a_id not in self.test_ids['gene'] \
and gene_b_id not in self.test_ids['gene']:
continue
assoc = InteractionAssoc(
self.name, gene_a_id, gene_b_id, interaction_type_id)
assoc.set_association_id(interaction_id)
assoc.add_association_to_graph(g)
assoc_id = assoc.get_association_id()
# citation is not a pmid or WBref - get this some other way
gu.addDescription(g, assoc_id, summary)
if not self.testMode \
and limit is not None and line_counter > limit:
break
return
示例5: process_feature_loc
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
#.........这里部分代码省略.........
fid = 'WormBase:'+attribute_dict.get('variation')
flabel = attribute_dict.get('public_name')
sub = attribute_dict.get('substitution')
ins = attribute_dict.get('insertion')
# if it's a variation:
# variation=WBVar00604246;public_name=gk320600;strain=VC20384;substitution=C/T
desc = ''
if sub is not None:
desc = 'substitution='+sub
if ins is not None:
desc = 'insertion='+ins
# keep track of the strains with this variation,
# for later processing
strain_list = attribute_dict.get('strain')
if strain_list is not None:
for s in re.split(r',', strain_list):
if s.strip() not in strain_to_variant_map:
strain_to_variant_map[s.strip()] = set()
strain_to_variant_map[s.strip()].add(fid)
# if feature_type_label == 'RNAi_reagent':
# Target=WBRNAi00096030 1 4942
# this will tell us where the RNAi is actually binding
# target = attribute_dict.get('Target') # TODO unused
# rnai_num = re.split(r' ', target)[0] # TODO unused
# it will be the reagent-targeted-gene that has a position,
# (i think)
# TODO finish the RNAi binding location
name = attribute_dict.get('Name')
polymorphism = attribute_dict.get('polymorphism')
if fid is None:
if name is not None and re.match(r'WBsf', name):
fid = 'WormBase:'+name
name = None
else:
continue
if self.testMode \
and re.sub(r'WormBase:', '', fid) \
not in self.test_ids['gene']+self.test_ids['allele']:
continue
# these really aren't that interesting
if polymorphism is not None:
continue
if name is not None and not re.search(name, fid):
if flabel is None:
flabel = name
else:
gu.addSynonym(g, fid, name)
if desc is not None:
gu.addDescription(g, fid, desc)
alias = attribute_dict.get('Alias')
biotype = attribute_dict.get('biotype')
note = attribute_dict.get('Note')
other_name = attribute_dict.get('other_name')
for n in [alias, other_name]:
if n is not None:
gu.addSynonym(g, fid, other_name)
ftype = self.get_feature_type_by_class_and_biotype(
feature_type_label, biotype)
chr_id = makeChromID(chrom, build_id, 'CHR')
geno.addChromosomeInstance(chrom, build_id, build_num)
f = Feature(fid, flabel, ftype)
f.addFeatureStartLocation(start, chr_id, strand)
f.addFeatureEndLocation(start, chr_id, strand)
feature_is_class = False
if feature_type_label == 'gene':
feature_is_class = True
f.addFeatureToGraph(g, True, None, feature_is_class)
if note is not None:
gu.addDescription(g, fid, note)
if not self.testMode \
and limit is not None and line_counter > limit:
break
# RNAi reagents:
# I RNAi_primary RNAi_reagent 4184 10232 . + . Target=WBRNAi00001601 1 6049 +;laboratory=YK;history_name=SA:yk326e10
# I RNAi_primary RNAi_reagent 4223 10147 . + . Target=WBRNAi00033465 1 5925 +;laboratory=SV;history_name=MV_SV:mv_G_YK5052
# I RNAi_primary RNAi_reagent 5693 9391 . + . Target=WBRNAi00066135 1 3699 +;laboratory=CH
# TODO TF bindiing sites and network:
# I TF_binding_site_region TF_binding_site 1861 2048 . + . Name=WBsf292777;tf_id=WBTranscriptionFactor000025;tf_name=DAF-16
# I TF_binding_site_region TF_binding_site 3403 4072 . + . Name=WBsf331847;tf_id=WBTranscriptionFactor000703;tf_name=DPL-1
return
示例6: _process_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
#.........这里部分代码省略.........
' '.join(
(patient_label.strip(), 'with', short_desc))
else:
patient_label = \
' '.join(
(patient_label.strip(), 'of proband with',
short_desc))
# ############# BUILD THE CELL LINE #############
# Adding the cell line as a typed individual.
cell_line_reagent_id = 'CLO:0000031'
gu.addIndividualToGraph(
g, cell_line_id, line_label, cell_line_reagent_id)
# add the equivalent id == dna_ref
if dna_ref != '' and dna_ref != catalog_id:
equiv_cell_line = 'Coriell:'+dna_ref
# some of the equivalent ids are not defined
# in the source data; so add them
gu.addIndividualToGraph(
g, equiv_cell_line, None, cell_line_reagent_id)
gu.addSameIndividual(g, cell_line_id, equiv_cell_line)
# Cell line derives from patient
geno.addDerivesFrom(cell_line_id, patient_id)
geno.addDerivesFrom(cell_line_id, cell_type)
# Cell line a member of repository
gu.addMember(g, repository, cell_line_id)
if cat_remark != '':
gu.addDescription(g, cell_line_id, cat_remark)
# Cell age_at_sampling
# TODO add the age nodes when modeled properly in #78
# if (age != ''):
# this would give a BNode that is an instance of Age.
# but i don't know how to connect
# the age node to the cell line? we need to ask @mbrush
# age_id = '_'+re.sub('\s+','_',age)
# gu.addIndividualToGraph(
# g,age_id,age,self.terms['age'])
# gu.addTriple(
# g,age_id,self.properties['has_measurement'],age,
# True)
# ############# BUILD THE PATIENT #############
# Add the patient ID as an individual.
gu.addPerson(g, patient_id, patient_label)
# TODO map relationship to proband as a class
# (what ontology?)
# Add race of patient
# FIXME: Adjust for subcategories based on ethnicity field
# EDIT: There are 743 different entries for ethnicity...
# Too many to map?
# Add ethnicity as literal in addition to the mapped race?
# Adjust the ethnicity txt (if using)
# to initial capitalization to remove ALLCAPS
# TODO race should go into the individual's background
# and abstracted out to the Genotype class punting for now.
# if race != '':
示例7: OBAN
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
#.........这里部分代码省略.........
if s is None:
logging.error(
"Unable to retrieve graph node for Subject %s ", self.sub)
return
elif p is None:
logging.error(
"Unable to retrieve graph node for Predicate %s ", self.rel)
return
elif o is None:
logging.error(
"Unable to retrieve graph node for Object %s ", self.obj)
return
else:
g.add((s, p, o))
if self.assoc_id is None:
self.set_association_id()
node = self.gu.getNode(self.assoc_id)
g.add((node, RDF['type'],
self.gu.getNode(self.assoc_types['association'])))
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_subject'], self.sub)
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_object'], self.obj)
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_predicate'], self.rel)
if self.description is not None:
self.gu.addDescription(g, self.assoc_id, self.description)
if self.evidence is not None and len(self.evidence) > 0:
for e in self.evidence:
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_evidence'], e)
if self.source is not None and len(self.source) > 0:
for s in self.source:
if re.match('http', s):
# TODO assume that the source is a publication?
# use Reference class here
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_source'], s,
True)
else:
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_source'], s)
if self.provenance is not None and len(self.provenance) > 0:
for p in self.provenance:
self.gu.addTriple(g, self.assoc_id,
self.object_properties['has_provenance'], p)
if self.score is not None:
self.gu.addTriple(
g, self.assoc_id, self.properties['has_measurement'],
Literal(self.score, datatype=XSD['float']), True)
# TODO
# update with some kind of instance of scoring object
# that has a unit and type
return
示例8: _process_ortholog_classes
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def _process_ortholog_classes(self, limit=None):
"""
This method add the KEGG orthology classes to the graph.
If there's an embedded enzyme commission number,
that is added as an xref.
Triples created:
<orthology_class_id> is a class
<orthology_class_id> has label <orthology_symbols>
<orthology_class_id> has description <orthology_description>
:param limit:
:return:
"""
logger.info("Processing ortholog classes")
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
gu = GraphUtils(curie_map.get())
raw = '/'.join((self.rawdir, self.files['ortholog_classes']['file']))
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(orthology_class_id, orthology_class_name) = row
if self.testMode and \
orthology_class_id not in \
self.test_ids['orthology_classes']:
continue
# The orthology class is essentially a KEGG gene ID
# that is species agnostic.
# Add the ID and label as a gene family class
other_labels = re.split(r'[;,]', orthology_class_name)
# the first one is the label we'll use
orthology_label = other_labels[0]
orthology_class_id = 'KEGG-'+orthology_class_id.strip()
orthology_type = OrthologyAssoc.terms['gene_family']
gu.addClassToGraph(g, orthology_class_id, orthology_label,
orthology_type)
if len(other_labels) > 1:
# add the rest as synonyms
# todo skip the first
for s in other_labels:
gu.addSynonym(g, orthology_class_id, s.strip())
# add the last one as the description
d = other_labels[len(other_labels)-1]
gu.addDescription(g, orthology_class_id, d)
# add the enzyme commission number (EC:1.2.99.5)as an xref
# sometimes there's two, like [EC:1.3.5.1 1.3.5.4]
# can also have a dash, like EC:1.10.3.-
ec_matches = re.findall(r'((?:\d+|\.|-){5,7})', d)
if ec_matches is not None:
for ecm in ec_matches:
gu.addXref(g, orthology_class_id, 'EC:'+ecm)
if not self.testMode and \
limit is not None and line_counter > limit:
break
logger.info("Done with ortholog classes")
return
示例9: _process_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
#.........这里部分代码省略.........
# can change later if desired.
# since the genotype is reflective of the place
# it got made, should put that in to disambiguate
genotype_name = \
genotype_name+' ['+pheno_center_strain_label+']'
geno.addGenomicBackgroundToGenotype(
pheno_center_strain_id, genotype_id)
geno.addTaxon(pheno_center_strain_id, taxon_id)
# this is redundant, but i'll keep in in for now
geno.addSequenceDerivesFrom(genotype_id, colony_id)
genotype_name += '['+colony+']'
geno.addGenotype(genotype_id, genotype_name)
# Make the sex-qualified genotype,
# which is what the phenotype is associated with
sex_qualified_genotype_id = \
self.make_id(
(colony_id + phenotyping_center + zygosity +
strain_accession_id+sex))
sex_qualified_genotype_label = genotype_name+' ('+sex+')'
if sex == 'male':
sq_type_id = geno.genoparts['male_genotype']
elif sex == 'female':
sq_type_id = geno.genoparts['female_genotype']
else:
sq_type_id = geno.genoparts['sex_qualified_genotype']
geno.addGenotype(
sex_qualified_genotype_id,
sex_qualified_genotype_label, sq_type_id)
geno.addParts(
genotype_id, sex_qualified_genotype_id,
geno.object_properties['has_alternate_part'])
if genomic_background_id is not None and \
genomic_background_id != '':
# Add the taxon to the genomic_background_id
geno.addTaxon(taxon_id, genomic_background_id)
else:
# add it as the genomic background
geno.addTaxon(taxon_id, genotype_id)
# ############# BUILD THE G2P ASSOC #############
# from an old email dated July 23 2014:
# Phenotypes associations are made to
# imits colony_id+center+zygosity+gender
phenotype_id = mp_term_id
# it seems that sometimes phenotype ids are missing.
# indicate here
if phenotype_id is None or phenotype_id == '':
logger.warning(
"No phenotype id specified for row %d: %s",
line_counter, str(row))
continue
# experimental_phenotypic_evidence This was used in ZFIN
eco_id = "ECO:0000059"
# the association comes as a result of a g2p from
# a procedure in a pipeline at a center and parameter tested
assoc = G2PAssoc(self.name, sex_qualified_genotype_id,
phenotype_id)
assoc.add_evidence(eco_id)
# assoc.set_score(float(p_value))
# TODO add evidence instance using
# pipeline_stable_id +
# procedure_stable_id +
# parameter_stable_id
assoc.add_association_to_graph(g)
assoc_id = assoc.get_association_id()
# add a free-text description
description = \
' '.join((mp_term_name, 'phenotype determined by',
phenotyping_center, 'in an',
procedure_name, 'assay where',
parameter_name.strip(),
'was measured with an effect_size of',
str(round(float(effect_size), 5)),
'(p =', "{:.4e}".format(float(p_value)), ').'))
gu.addDescription(g, assoc_id, description)
# TODO add provenance information
# resource_id = resource_name
# assoc.addSource(g, assoc_id, resource_id)
if not self.testMode and \
limit is not None and line_counter > limit:
break
gu.loadProperties(g, G2PAssoc.object_properties, gu.OBJPROP)
gu.loadProperties(g, G2PAssoc.annotation_properties, gu.ANNOTPROP)
gu.loadProperties(g, G2PAssoc.datatype_properties, gu.DATAPROP)
return
示例10: OMIA
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
#.........这里部分代码省略.........
if descr == '':
descr = None
# omia label
omia_label = self.label_hash.get(omia_id)
# add the species-specific subclass (TODO please review this choice)
gb_species_id = row['gb_species_id']
if gb_species_id != '':
sp_phene_id = '-'.join((omia_id, gb_species_id))
else:
logger.error(
"No species supplied in species-specific phene table for %s",
omia_id)
return
species_id = 'NCBITaxon:'+str(gb_species_id)
# use this instead
species_label = self.label_hash.get('NCBITaxon:'+gb_species_id)
if sp_phene_label is None and \
omia_label is not None and species_label is not None:
sp_phene_label = ' '.join((omia_label, 'in', species_label))
self.gu.addClassToGraph(
self.g, sp_phene_id, sp_phene_label, omia_id, descr)
# add to internal hash store for later lookup
self.id_hash['phene'][row['phene_id']] = sp_phene_id
self.label_hash[sp_phene_id] = sp_phene_label
# add each of the following descriptions,
# if they are populated, with a tag at the end.
for item in [
'clin_feat', 'history', 'pathology', 'mol_gen', 'control']:
if row[item] is not None and row[item] != '':
self.gu.addDescription(
self.g, sp_phene_id, row[item] + ' ['+item+']')
# if row['symbol'] is not None: # species-specific
# CHECK ME - sometimes spaces or gene labels
# gu.addSynonym(g, sp_phene, row['symbol'])
self.gu.addOWLPropertyClassRestriction(
self.g, sp_phene_id, self.gu.object_properties['in_taxon'],
species_id)
# add inheritance as an association
inheritance_id = self._map_inheritance_term_id(row['inherit'])
if inheritance_id is not None:
assoc = DispositionAssoc(self.name, sp_phene_id, inheritance_id)
assoc.add_association_to_graph(self.g)
if row['characterised'] == 'Yes':
self.stored_omia_mol_gen[omia_id] = {
'mol_gen': row['mol_gen'],
'map_info': row['map_info'],
'species': row['gb_species_id']}
return
def write_molgen_report(self):
import csv
logger.info("Writing G2P report for OMIA")
f = '/'.join((self.outdir, 'omia_molgen_report.txt'))
with open(f, 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile, delimiter='\t')
# write header
h = ['omia_id', 'molecular_description', 'mapping_info', 'species']
示例11: process_gaf
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
def process_gaf(self, file, limit, id_map=None):
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
geno = Genotype(g)
logger.info("Processing Gene Associations from %s", file)
line_counter = 0
zfin = wbase = None
if 7955 in self.tax_ids:
zfin = ZFIN()
elif 6239 in self.tax_ids:
wbase = WormBase()
with gzip.open(file, 'rb') as csvfile:
filereader = csv.reader(io.TextIOWrapper(csvfile, newline=""),
delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
# comments start with exclamation
if re.match(r'!', ''.join(row)):
continue
(db, gene_num, gene_symbol, qualifier, go_id, ref, eco_symbol,
with_or_from, aspect, gene_name, gene_synonym, object_type,
taxon, date, assigned_by, annotation_extension,
gene_product_form_id) = row
# test for required fields
if (db == '' or gene_num == '' or gene_symbol == '' or
go_id == '' or ref == '' or eco_symbol == '' or
aspect == '' or object_type == '' or taxon == '' or
date == '' or assigned_by == ''):
logger.error(
"Missing required part of annotation " +
"on row %d:\n"+'\t'.join(row),
line_counter)
continue
# deal with qualifier NOT, contributes_to, colocalizes_with
if re.search(r'NOT', qualifier):
continue
db = self.clean_db_prefix(db)
uniprotid = None
gene_id = None
if db == 'UniProtKB':
mapped_ids = id_map.get(gene_num)
if id_map is not None and mapped_ids is not None:
if len(mapped_ids) == 1:
gene_id = mapped_ids[0]
uniprotid = ':'.join((db, gene_num))
gene_num = re.sub(r'\w+\:', '', gene_id)
elif len(mapped_ids) > 1:
# logger.warning(
# "Skipping gene id mapped for >1 gene %s -> %s",
# gene_num, str(mapped_ids))
continue
else:
continue
elif db == 'MGI':
gene_num = re.sub(r'MGI:', '', gene_num)
gene_id = ':'.join((db, gene_num))
gene_id = re.sub(r'MGI\:MGI\:', 'MGI:', gene_id)
else:
gene_id = ':'.join((db, gene_num))
if self.testMode \
and not(
re.match(r'NCBIGene', gene_id) and
int(gene_num) in self.test_ids):
continue
gu.addClassToGraph(g, gene_id, gene_symbol)
if gene_name != '':
gu.addDescription(g, gene_id, gene_name)
if gene_synonym != '':
for s in re.split(r'\|', gene_synonym):
gu.addSynonym(g, gene_id, s.strip())
if re.search(r'\|', taxon):
# TODO add annotations with >1 taxon
logger.info(">1 taxon (%s) on line %d. skipping", taxon,
line_counter)
else:
tax_id = re.sub(r'taxon:', 'NCBITaxon:', taxon)
geno.addTaxon(tax_id, gene_id)
assoc = Assoc(self.name)
assoc.set_subject(gene_id)
assoc.set_object(go_id)
eco_id = self.map_go_evidence_code_to_eco(eco_symbol)
if eco_id is not None:
assoc.add_evidence(eco_id)
refs = re.split(r'\|', ref)
#.........这里部分代码省略.........
示例12: __init__
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import addDescription [as 别名]
class Dataset:
"""
this will produce the metadata about a dataset
following the example laid out here:
http://htmlpreview.github.io/?
https://github.com/joejimbo/HCLSDatasetDescriptions/blob/master/Overview.html#appendix_1
(mind the wrap)
"""
namespaces = {
'dctypes': 'http://purl.org/dc/dcmitype/',
'pav': 'http://purl.org/pav/',
'dcat': 'http://www.w3.org/ns/dcat#'
}
core_bindings = {'rdf': RDF, 'foaf': FOAF, 'xsd': XSD, 'dct': DCTERMS}
def __init__(self, identifier, title, url, description=None,
license_url=None, data_rights=None):
DCTYPES = Namespace(self.namespaces['dctypes'])
self.gu = GraphUtils(curie_map.get())
self.identifier = URIRef(':'+identifier)
self.version = None
self.date_issued = None
self.date_accessed = None
self.citation = set()
self.set_access_date()
self.license = license_url
self.graph = Graph()
self.load_bindings()
self.graph.add((self.identifier, RDF['type'], DCTYPES['Dataset']))
self.graph.add((self.identifier, DCTERMS['title'], Literal(title)))
self.graph.add(
(self.identifier, DCTERMS['identifier'], Literal(identifier)))
self.graph.add((self.identifier, FOAF['page'], URIRef(url)))
self.dipperized_version = URIRef('monarch'+str(self.date_accessed))
# maybe in the future add the logo here:
# schemaorg:logo <http://www.ebi.ac.uk/rdf/sites/ebi.ac.uk.rdf/files/resize/images/rdf/chembl_service_logo-146x48.gif> .
# TODO add the licence info
# FIXME:Temporarily making this in IF statement,
# can revert after all current resources are updated.
if license_url is not None:
self.graph.add(
(self.identifier, DCTERMS['license'], URIRef(license_url)))
else:
logger.debug('No license provided.')
if data_rights is not None:
self.graph.add(
(self.identifier, DCTERMS['rights'], Literal(data_rights)))
else:
logger.debug('No rights provided.')
if description is not None:
self.gu.addDescription(self.graph, self.identifier, description)
return
def load_bindings(self):
for k in self.core_bindings:
v = self.core_bindings[k]
self.graph.bind(k, v)
for k in self.namespaces.keys():
v = self.namespaces[k]
self.graph.bind(k, Namespace(v))
return
def setVersion(self, date_issued, version_id=None):
"""
Legacy function... should use the other set_* for version and date
# TODO set as deprecated
:param date_issued:
:param version_id:
:return:
"""
if date_issued is not None:
self.set_date_issued(date_issued)
elif version_id is not None:
# this shouldn't happen
self.set_version_by_num(version_id)
else:
logger.error("No date or version set!")
# TODO throw error
return
if version_id is not None:
self.set_version_by_num(version_id)
else:
self.set_version_by_date(date_issued)
logger.info("set version to %s", self.version)
return
def set_date_issued(self, date_issued):
#.........这里部分代码省略.........