本文整理汇总了Python中dipper.utils.GraphUtils.GraphUtils.loadObjectProperties方法的典型用法代码示例。如果您正苦于以下问题:Python GraphUtils.loadObjectProperties方法的具体用法?Python GraphUtils.loadObjectProperties怎么用?Python GraphUtils.loadObjectProperties使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.utils.GraphUtils.GraphUtils
的用法示例。
在下文中一共展示了GraphUtils.loadObjectProperties方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
def parse(self, limit=None):
if limit is not None:
logger.info("Only parsing first %s rows of each file", limit)
if self.version_num is None:
import os
logger.info("Figuring out version num for files")
# probe the raw directory for the WSnumber on
# the "letter.WS###" file.
# this is the only one that we keep the version number on
files = os.listdir(self.rawdir)
letter_file = next(f for f in files if re.match(r'letter', f))
vernum = re.search(r'(WS\d+)', letter_file)
self.update_wsnum_in_files(vernum.group(1))
logger.info("Parsing files...")
if self.testOnly:
self.testMode = True
if self.testMode:
g = self.testgraph
else:
g = self.graph
self.nobnodes = True # FIXME
# to hold any label for a given id
self.id_label_map = {}
# to hold the mappings between genotype and background
self.genotype_backgrounds = {}
self.extrinsic_id_to_enviro_id_hash = {}
# to hold the genes variant due to a seq alt
self.variant_loci_genes = {}
# to hold the parts of an environment
self.environment_hash = {}
self.wildtype_genotypes = []
# stores the rnai_reagent to gene targets
self.rnai_gene_map = {}
self.process_gene_ids(limit)
# self.process_gene_desc(limit) #TEC imput file is mia 2016-Mar-03
self.process_allele_phenotype(limit)
self.process_rnai_phenotypes(limit)
self.process_pub_xrefs(limit)
self.process_feature_loc(limit)
self.process_disease_association(limit)
# TODO add this when when complete
# self.process_gene_interaction(limit)
logger.info("Finished parsing.")
self.load_bindings()
gu = GraphUtils(curie_map.get())
gu.loadAllProperties(g)
gu.loadObjectProperties(g, Genotype.object_properties)
logger.info("Found %d nodes in graph", len(self.graph))
logger.info("Found %d nodes in testgraph", len(self.testgraph))
return
示例2: parse
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
def parse(self, limit=None):
if limit is not None:
logger.info("Only parsing first %s rows of each file", limit)
logger.info("Parsing files...")
if self.testOnly:
self.testMode = True
if self.testMode:
g = self.testgraph
else:
g = self.graph
self.nobnodes = True # FIXME
# build the id map for mapping uniprot ids to genes
uniprot_entrez_id_map = self.get_uniprot_entrez_id_map()
for s in self.files:
if s in ['go-references', 'id-map']:
continue
if not self.testMode and int(s) not in self.tax_ids:
continue
file = '/'.join((self.rawdir, self.files.get(s)['file']))
self.process_gaf(file, limit, uniprot_entrez_id_map)
logger.info("Finished parsing.")
self.load_bindings()
gu = GraphUtils(curie_map.get())
gu.loadAllProperties(g)
gu.loadObjectProperties(g, Genotype.object_properties)
logger.info("Found %d nodes in graph", len(self.graph))
logger.info("Found %d nodes in testgraph", len(self.testgraph))
return
示例3: Monochrom
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
#.........这里部分代码省略.........
self.load_core_bindings()
self.load_bindings()
# using the full graph as the test here
self.testgraph = self.graph
logger.info("Found %d nodes", len(self.graph))
logger.info("Done parsing files.")
return
def _get_chrbands(self, limit, taxon):
"""
For the given taxon, it will fetch the chr band file.
We will not deal with the coordinate information with this parser.
Here, we only are concerned with building the partonomy.
:param limit:
:return:
"""
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
logger.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:'+taxon
# add the taxon as a class. adding the class label elsewhere
self.gu.addClassToGraph(self.graph, taxon_id, None)
self.gu.addSynonym(self.graph, taxon_id, genome_label)
self.gu.loadObjectProperties(self.graph, Feature.object_properties)
genome_id = geno.makeGenomeID(taxon_id)
geno.addGenome(taxon_id, genome_label)
self.gu.addOWLPropertyClassRestriction(
self.graph, genome_id, Genotype.object_properties['in_taxon'],
taxon_id)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match(r'^#', line):
continue
# chr13 4500000 10000000 p12 stalk
(chrom, start, stop, band, rtype) = line.split('\t')
line_counter += 1
# NOTE
# some less-finished genomes have placed and unplaced scaffolds
# * Placed scaffolds:
# Scaffold has an oriented location within a chromosome.
# * Unlocalized scaffolds:
# scaffold 's chromosome is known,
# scaffold's position, orientation or both is not known.
# *Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to.
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
示例4: _process_data
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
def _process_data(self, raw, limit=None):
logger.info("Processing Data from %s", raw)
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
line_counter = 0
gu.loadAllProperties(g)
gu.loadObjectProperties(g, geno.object_properties)
# Add the taxon as a class
taxon_id = 'NCBITaxon:10090' # map to Mus musculus
gu.addClassToGraph(g, taxon_id, None)
# with open(raw, 'r', encoding="utf8") as csvfile:
with gzip.open(raw, 'rt') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip the header row
for row in filereader:
line_counter += 1
(marker_accession_id, marker_symbol, phenotyping_center,
colony, sex, zygosity, allele_accession_id, allele_symbol,
allele_name, strain_accession_id, strain_name, project_name,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name, top_level_mp_term_id, top_level_mp_term_name,
mp_term_id, mp_term_name, p_value, percentage_change,
effect_size, statistical_method, resource_name) = row
if self.testMode and marker_accession_id not in self.test_ids:
continue
# ##### cleanup some of the identifiers ######
zygosity_id = self._map_zygosity(zygosity)
# colony ids sometimes have <> in them, spaces,
# or other non-alphanumerics and break our system;
# replace these with underscores
colony_id = '_'+re.sub(r'\W+', '_', colony)
if self.nobnodes:
colony_id = ':'+colony_id
if not re.match(r'MGI', allele_accession_id):
allele_accession_id = \
'_IMPC-'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
allele_accession_id = ':'+allele_accession_id
if re.search(r'EUROCURATE', strain_accession_id):
# the eurocurate links don't resolve at IMPC
strain_accession_id = '_'+strain_accession_id
if self.nobnodes:
strain_accession_id = ':'+strain_accession_id
elif not re.match(r'MGI', strain_accession_id):
logger.info(
"Found a strange strain accession...%s",
strain_accession_id)
strain_accession_id = 'IMPC:'+strain_accession_id
######################
# first, add the marker and variant to the graph as with MGI,
# the allele is the variant locus. IF the marker is not known,
# we will call it a sequence alteration. otherwise,
# we will create a BNode for the sequence alteration.
sequence_alteration_id = variant_locus_id = None
variant_locus_name = sequence_alteration_name = None
# extract out what's within the <> to get the symbol
if re.match(r'.*<.*>', allele_symbol):
sequence_alteration_name = \
re.match(r'.*<(.*)>', allele_symbol).group(1)
else:
sequence_alteration_name = allele_symbol
if marker_accession_id is not None and \
marker_accession_id == '':
logger.warning(
"Marker unspecified on row %d", line_counter)
marker_accession_id = None
if marker_accession_id is not None:
variant_locus_id = allele_accession_id
variant_locus_name = allele_symbol
variant_locus_type = geno.genoparts['variant_locus']
geno.addGene(marker_accession_id, marker_symbol,
geno.genoparts['gene'])
geno.addAllele(variant_locus_id, variant_locus_name,
variant_locus_type, None)
geno.addAlleleOfGene(variant_locus_id, marker_accession_id)
sequence_alteration_id = \
'_seqalt'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
sequence_alteration_id = ':'+sequence_alteration_id
geno.addSequenceAlterationToVariantLocus(
sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........
示例5: UCSCBands
# 需要导入模块: from dipper.utils.GraphUtils import GraphUtils [as 别名]
# 或者: from dipper.utils.GraphUtils.GraphUtils import loadObjectProperties [as 别名]
#.........这里部分代码省略.........
# using the full graph as the test here
self.testgraph = self.graph
logger.info("Found %d nodes", len(self.graph))
logger.info("Done parsing files.")
return
def _get_chrbands(self, limit, taxon):
"""
:param limit:
:return:
"""
# TODO PYLINT figure out what limit was for and why it is unused
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
logger.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
monochrom = Monochrom()
# used to hold band definitions for a chr
# in order to compute extent of encompasing bands
mybands = {}
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:'+taxon
# add the taxon as a class. adding the class label elsewhere
self.gu.addClassToGraph(self.graph, taxon_id, None)
self.gu.addSynonym(self.graph, taxon_id, genome_label)
self.gu.loadObjectProperties(self.graph, Feature.object_properties)
self.gu.loadProperties(self.graph, Feature.data_properties,
self.gu.DATAPROP)
self.gu.loadAllProperties(self.graph)
geno.addGenome(taxon_id, genome_label)
# add the build and the taxon it's in
build_num = self.files[taxon]['build_num']
build_id = 'UCSC:'+build_num
geno.addReferenceGenome(build_id, build_num, taxon_id)
# process the bands
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match('^#', line):
continue
# chr13 4500000 10000000 p12 stalk
(scaffold, start, stop, band_num, rtype) = line.split('\t')
line_counter += 1
# NOTE some less-finished genomes have
# placed and unplaced scaffolds
# * Placed scaffolds:
# the scaffolds have been placed within a chromosome.
# * Unlocalized scaffolds:
# although the chromosome within which the scaffold occurs
# is known, the scaffold's position or orientation
# is not known.
# * Unplaced scaffolds: