本文整理汇总了Python中dipper.models.Genotype.Genotype.makeGenomeID方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.makeGenomeID方法的具体用法?Python Genotype.makeGenomeID怎么用?Python Genotype.makeGenomeID使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.makeGenomeID方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_QTLs_genomic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import makeGenomeID [as 别名]
def _process_QTLs_genomic_location(self, raw, taxon_id, build_id, build_label, limit=None):
"""
This method
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
gu = GraphUtils(curie_map.get())
line_counter = 0
geno = Genotype(g)
genome_id = geno.makeGenomeID(taxon_id) # assume that chrs get added to the genome elsewhere
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
with gzip.open(raw, 'rt', encoding='ISO-8859-1') as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for row in reader:
line_counter += 1
if re.match('^#', ' '.join(row)):
continue
(chromosome, qtl_source, qtl_type, start_bp, stop_bp, frame, strand, score, attr) = row
# Chr.Z Animal QTLdb Production_QTL 33954873 34023581 . . .
# QTL_ID=2242;Name="Spleen percentage";Abbrev="SPLP";PUBMED_ID=17012160;trait_ID=2234;
# trait="Spleen percentage";breed="leghorn";"FlankMarkers=ADL0022";VTO_name="spleen mass";
# CMO_name="spleen weight to body weight ratio";Map_Type="Linkage";Model="Mendelian";
# Test_Base="Chromosome-wise";Significance="Significant";P-value="<0.05";F-Stat="5.52";
# Variance="2.94";Dominance_Effect="-0.002";Additive_Effect="0.01"
# make dictionary of attributes
# keys are:
# QTL_ID,Name,Abbrev,PUBMED_ID,trait_ID,trait,
# FlankMarkers,VTO_name,Map_Type,Significance,P-value,Model,Test_Base,Variance,
# Bayes-value,PTO_name,gene_IDsrc,peak_cM,CMO_name,gene_ID,F-Stat,LOD-score,Additive_Effect,
# Dominance_Effect,Likelihood_Ratio,LS-means,Breed,
# trait (duplicate with Name),Variance,Bayes-value,
# F-Stat,LOD-score,Additive_Effect,Dominance_Effect,Likelihood_Ratio,LS-means
# deal with poorly formed attributes
if re.search('"FlankMarkers";', attr):
attr = re.sub('"FlankMarkers";', '', attr)
attr_items = re.sub('"', '', attr).split(";")
bad_attr_flag = False
for a in attr_items:
if not re.search('=', a):
bad_attr_flag = True
if bad_attr_flag:
logger.error("Poorly formed data on line %d:\n %s", line_counter, '\t'.join(row))
continue
attribute_dict = dict(item.split("=") for item in re.sub('"', '', attr).split(";"))
qtl_num = attribute_dict.get('QTL_ID')
if self.testMode and int(qtl_num) not in self.test_ids:
continue
# make association between QTL and trait
qtl_id = 'AQTL:' + str(qtl_num)
gu.addIndividualToGraph(g, qtl_id, None, geno.genoparts['QTL'])
geno.addTaxon(taxon_id, qtl_id)
trait_id = 'AQTLTrait:'+attribute_dict.get('trait_ID')
# if pub is in attributes, add it to the association
pub_id = None
if 'PUBMED_ID' in attribute_dict.keys():
pub_id = attribute_dict.get('PUBMED_ID')
if re.match('ISU.*', pub_id):
pub_id = 'AQTLPub:' + pub_id.strip()
p = Reference(pub_id)
else:
pub_id = 'PMID:' + pub_id.strip()
p = Reference(pub_id, Reference.ref_types['journal_article'])
p.addRefToGraph(g)
# Add QTL to graph
assoc = G2PAssoc(self.name, qtl_id, trait_id, gu.object_properties['is_marker_for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
if 'P-value' in attribute_dict.keys():
score = float(re.sub('<', '', attribute_dict.get('P-value')))
assoc.set_score(score)
assoc.add_association_to_graph(g)
# TODO make association to breed (which means making QTL feature in Breed background)
# get location of QTL
chromosome = re.sub('Chr\.', '', chromosome)
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
qtl_feature = Feature(qtl_id, None, geno.genoparts['QTL'])
if start_bp == '':
#.........这里部分代码省略.........
示例2: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import makeGenomeID [as 别名]
def _get_chrbands(self, limit, taxon):
"""
For the given taxon, it will fetch the chr band file.
We will not deal with the coordinate information with this parser.
Here, we only are concerned with building the partonomy.
:param limit:
:return:
"""
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
logger.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:'+taxon
# add the taxon as a class. adding the class label elsewhere
self.gu.addClassToGraph(self.graph, taxon_id, None)
self.gu.addSynonym(self.graph, taxon_id, genome_label)
self.gu.loadObjectProperties(self.graph, Feature.object_properties)
genome_id = geno.makeGenomeID(taxon_id)
geno.addGenome(taxon_id, genome_label)
self.gu.addOWLPropertyClassRestriction(
self.graph, genome_id, Genotype.object_properties['in_taxon'],
taxon_id)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match(r'^#', line):
continue
# chr13 4500000 10000000 p12 stalk
(chrom, start, stop, band, rtype) = line.split('\t')
line_counter += 1
# NOTE
# some less-finished genomes have placed and unplaced scaffolds
# * Placed scaffolds:
# Scaffold has an oriented location within a chromosome.
# * Unlocalized scaffolds:
# scaffold 's chromosome is known,
# scaffold's position, orientation or both is not known.
# *Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to.
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
# TODO unused
# unlocalized_scaffold_pattern = \
# placed_scaffold_pattern + r'_(\w+)_random'
# unplaced_scaffold_pattern = r'chrUn_(\w+)'
m = re.match(placed_scaffold_pattern+r'$', chrom)
if m is not None and len(m.groups()) == 1:
# the chromosome is the first match of the pattern
# ch = m.group(1) # TODO unused
pass
else:
# let's skip over anything that isn't a placed_scaffold
# at the class level
logger.info("Skipping non-placed chromosome %s", chrom)
continue
# the chrom class, taxon as the reference
cclassid = makeChromID(chrom, taxon, 'CHR')
# add the chromosome as a class
geno.addChromosomeClass(chrom, taxon_id, genome_label)
self.gu.addOWLPropertyClassRestriction(
self.graph, cclassid,
self.gu.object_properties['member_of'], genome_id)
# add the band(region) as a class
maplocclass_id = cclassid+band
maplocclass_label = makeChromLabel(chrom+band, genome_label)
if band is not None and band.strip() != '':
region_type_id = self.map_type_of_region(rtype)
self.gu.addClassToGraph(
self.graph, maplocclass_id, maplocclass_label,
region_type_id)
else:
region_type_id = Feature.types['chromosome']
# add the staining intensity of the band
if re.match(r'g(neg|pos|var)', rtype):
if region_type_id in [
Feature.types['chromosome_band'],
Feature.types['chromosome_subband']]:
stain_type = Feature.types.get(rtype)
if stain_type is not None:
self.gu.addOWLPropertyClassRestriction(
self.graph, maplocclass_id,
Feature.properties['has_staining_intensity'],
#.........这里部分代码省略.........
示例3: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import makeGenomeID [as 别名]
def _get_chrbands(self, limit, taxon):
"""
For the given taxon, it will fetch the chr band file.
We will not deal with the coordinate information with this parser.
Here, we only are concerned with building the partonomy.
:param limit:
:return:
"""
model = Model(self.graph)
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
LOG.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:' + taxon
# add the taxon as a class. adding the class label elsewhere
model.addClassToGraph(taxon_id, None)
model.addSynonym(taxon_id, genome_label)
genome_id = geno.makeGenomeID(taxon_id)
geno.addGenome(taxon_id, genome_label)
model.addOWLPropertyClassRestriction(
genome_id, self.globaltt['in taxon'], taxon_id)
placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
# currently unused patterns
# unlocalized_scaffold_pattern = placed_scaffold_pattern + r'_(\w+)_random'
# unplaced_scaffold_pattern = r'chrUn_(\w+)'
col = ['chrom', 'start', 'stop', 'band', 'rtype']
with gzip.open(myfile, 'rb') as reader:
for line in reader:
line_counter += 1
# skip comments
line = line.decode().strip()
if line[0] == '#':
continue
# chr13 4500000 10000000 p12 stalk
row = line.split('\t')
chrom = row[col.index('chrom')]
band = row[col.index('band')]
rtype = row[col.index('rtype')]
# NOTE
# some less-finished genomes have placed and unplaced scaffolds
# * Placed scaffolds:
# Scaffold has an oriented location within a chromosome.
# * Unlocalized scaffolds:
# scaffold 's chromosome is known,
# scaffold's position, orientation or both is not known.
# *Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to.
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
mch = re.match(placed_scaffold_pattern+r'$', chrom)
if mch is not None and len(mch.groups()) == 1:
# the chromosome is the first match of the pattern
# chrom = m.group(1) # TODO unused
pass
else:
# let's skip over anything that isn't a placed_scaffold
LOG.info("Skipping non-placed chromosome %s", chrom)
continue
# the chrom class, taxon as the reference
cclassid = makeChromID(chrom, taxon, 'CHR')
# add the chromosome as a class
geno.addChromosomeClass(chrom, taxon_id, genome_label)
model.addOWLPropertyClassRestriction(
cclassid, self.globaltt['member of'], genome_id)
# add the band(region) as a class
maplocclass_id = cclassid+band
maplocclass_label = makeChromLabel(chrom+band, genome_label)
if band is not None and band.strip() != '':
region_type_id = self.map_type_of_region(rtype)
model.addClassToGraph(
maplocclass_id, maplocclass_label,
region_type_id)
else:
region_type_id = self.globaltt['chromosome']
# add the staining intensity of the band
if re.match(r'g(neg|pos|var)', rtype):
if region_type_id in [
self.globaltt['chromosome_band'],
self.globaltt['chromosome_subband']]:
stain_type = self.resolve(rtype)
if stain_type is not None:
model.addOWLPropertyClassRestriction(
maplocclass_id,
self.globaltt['has_sequence_attribute'],
self.resolve(rtype))
else:
# usually happens if it's a chromosome because
# they don't actually have banding info
#.........这里部分代码省略.........