本文整理汇总了Python中dipper.models.Genotype.Genotype.addParts方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addParts方法的具体用法?Python Genotype.addParts怎么用?Python Genotype.addParts使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addParts方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addParts [as 别名]
#.........这里部分代码省略.........
# with terrible hidden codes. remove them here
# i've seen a <98> character
karyotype = du.remove_control_characters(karyotype)
karyotype_id = None
if karyotype.strip() != '':
karyotype_id = \
'_'+re.sub('MONARCH:', '', self.make_id(karyotype))
if self.nobnodes:
karyotype_id = ':'+karyotype_id
# add karyotype as karyotype_variation_complement
gu.addIndividualToGraph(
g, karyotype_id, karyotype,
geno.genoparts['karyotype_variation_complement'])
# TODO break down the karyotype into parts
# and map into GENO. depends on #77
# place the karyotype in a location(s).
karyo_chrs = \
self._get_affected_chromosomes_from_karyotype(
karyotype)
for c in karyo_chrs:
chr_id = makeChromID(c, taxon, 'CHR')
# add an anonymous sequence feature,
# each located on chr
karyotype_feature_id = '-'.join((karyotype_id, c))
karyotype_feature_label = \
'some karyotype alteration on chr'+str(c)
f = Feature(
karyotype_feature_id, karyotype_feature_label,
geno.genoparts['sequence_alteration'])
f.addFeatureStartLocation(None, chr_id)
f.addFeatureToGraph(g)
f.loadAllProperties(g)
geno.addParts(
karyotype_feature_id, karyotype_id,
geno.object_properties['has_alternate_part'])
if gene != '':
vl = gene+'('+mutation+')'
# fix the variant_id so it's always in the same order
vids = variant_id.split(';')
variant_id = ';'.join(sorted(list(set(vids))))
if karyotype.strip() != '' \
and not self._is_normal_karyotype(karyotype):
mutation = mutation.strip()
gvc_id = karyotype_id
if variant_id != '':
gvc_id = '_' + variant_id.replace(';', '-') + '-' \
+ re.sub(r'\w*:', '', karyotype_id)
if mutation.strip() != '':
gvc_label = '; '.join((vl, karyotype))
else:
gvc_label = karyotype
elif variant_id.strip() != '':
gvc_id = '_' + variant_id.replace(';', '-')
gvc_label = vl
else:
# wildtype?
pass
if gvc_id is not None and gvc_id != karyotype_id \
and self.nobnodes:
gvc_id = ':'+gvc_id
示例2: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addParts [as 别名]
#.........这里部分代码省略.........
geno.genoparts['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_'+gene+'-VL'
vl_id = re.sub(r':', '', vl_id)
if self.nobnodes:
vl_id = ':'+vl_id
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(vl_id, vl_symbol,
geno.genoparts['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = '_'+re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
if self.nobnodes:
vslc_id = ':' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
self.id_label_hash[vslc_id] = vslc_label
vslc_list.append(vslc_id)
geno.addPartsToVSLC(
vslc_id, vl, None, geno.zygosity['indeterminate'],
geno.object_properties['has_alternate_part'], None)
gu.addIndividualToGraph(
g, vslc_id, vslc_label,
geno.genoparts['variant_single_locus_complement'])
if len(vslc_list) > 0:
if len(vslc_list) > 1:
gvc_id = '-'.join(vslc_list)
gvc_id = re.sub(r':', '', gvc_id)
if self.nobnodes:
gvc_id = ':'+gvc_id
gvc_label = \
'; '.join(self.id_label_hash[v] for v in vslc_list)
gu.addIndividualToGraph(
g, gvc_id, gvc_label,
geno.genoparts['genomic_variation_complement'])
for vslc_id in vslc_list:
geno.addVSLCtoParent(vslc_id, gvc_id)
else:
# the GVC == VSLC, so don't have to make an extra piece
gvc_id = vslc_list.pop()
gvc_label = self.id_label_hash[gvc_id]
genotype_label = gvc_label + ' [n.s.]'
bkgd_id = \
'_' + re.sub(r':', '', '-'.join(
(geno.genoparts['unspecified_genomic_background'],
s)))
genotype_id = '-'.join((gvc_id, bkgd_id))
if self.nobnodes:
bkgd_id = ':'+bkgd_id
geno.addTaxon(mouse_taxon, bkgd_id)
geno.addGenomicBackground(
bkgd_id, 'unspecified ('+s+')',
geno.genoparts['unspecified_genomic_background'],
"A placeholder for the " +
"unspecified genetic background for "+s)
geno.addGenomicBackgroundToGenotype(
bkgd_id, genotype_id,
geno.genoparts['unspecified_genomic_background'])
geno.addParts(
gvc_id, genotype_id,
geno.object_properties['has_alternate_part'])
geno.addGenotype(genotype_id, genotype_label)
gu.addTriple(
g, s, geno.object_properties['has_genotype'],
genotype_id)
else:
# logger.debug(
# "Strain %s is not making a proper genotype.", s)
pass
gu.loadProperties(
g, G2PAssoc.object_properties, G2PAssoc.OBJECTPROP)
gu.loadProperties(
g, G2PAssoc.datatype_properties, G2PAssoc.DATAPROP)
gu.loadProperties(
g, G2PAssoc.annotation_properties, G2PAssoc.ANNOTPROP)
gu.loadAllProperties(g)
logger.warning(
"The following gene symbols did not list identifiers: %s",
str(sorted(list(genes_with_no_ids))))
return
示例3: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addParts [as 别名]
#.........这里部分代码省略.........
# used to generate a mouse strain.
# Terry sez: we use this clone ID to track
# ES cell -> mouse strain -> mouse phenotyping.
# The same ES clone maybe used at multiple centers,
# so we have to concatenate the two to have a unique ID.
# some useful reading about generating mice from ES cells:
# http://ki.mit.edu/sbc/escell/services/details
# here, we'll make a genotype
# that derives from an ES cell with a given allele.
# the strain is not really attached to the colony.
# the colony/clone is reflective of the allele,
# with unknown zygosity
stem_cell_class = 'ERO:0002002'
gu.addIndividualToGraph(g, colony_id, colony, stem_cell_class)
# vslc of the colony has unknown zygosity
# note that we will define the allele
# (and it's relationship to the marker, etc.) later
# FIXME is it really necessary to create this vslc
# when we always know it's unknown zygosity?
vslc_colony = \
'_'+allele_accession_id+geno.zygosity['indeterminate']
vslc_colony = re.sub(r':', '', vslc_colony)
if self.nobnodes:
vslc_colony = ':'+vslc_colony
vslc_colony_label = allele_symbol+'/<?>'
# for ease of reading, we make the colony genotype variables.
# in the future, it might be desired to keep the vslcs
colony_genotype_id = vslc_colony
colony_genotype_label = vslc_colony_label
geno.addGenotype(colony_genotype_id, colony_genotype_label)
geno.addParts(allele_accession_id, colony_genotype_id,
geno.object_properties['has_alternate_part'])
geno.addPartsToVSLC(
vslc_colony, allele_accession_id, None,
geno.zygosity['indeterminate'],
geno.object_properties['has_alternate_part'])
gu.addTriple(
g, colony_id,
geno.object_properties['has_genotype'],
colony_genotype_id)
# ########## BUILD THE ANNOTATED GENOTYPE ##########
# now, we'll build the genotype of the individual that derives
# from the colony/clone genotype that is attached to
# phenotype = colony_id + strain + zygosity + sex
# (and is derived from a colony)
# this is a sex-agnostic genotype
genotype_id = \
self.make_id(
(colony_id + phenotyping_center + zygosity +
strain_accession_id))
geno.addSequenceDerivesFrom(genotype_id, colony_id)
# build the VSLC of the sex-agnostic genotype
# based on the zygosity
allele1_id = allele_accession_id
allele2_id = allele2_rel = None
allele1_label = allele_symbol
allele2_label = '<?>'
# Making VSLC labels from the various parts,
# can change later if desired.
if zygosity == 'heterozygote':
示例4: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addParts [as 别名]
#.........这里部分代码省略.........
if len(parents) > 0:
mybands[chrom_num+band_num]['parent'] = \
chrom_num+parents[0]
else:
# TODO PYLINT why is 'parent'
# a list() a couple of lines up and a set() here?
parents = set()
# loop through the parents and add them to the hash
# add the parents to the graph, in hierarchical order
# TODO PYLINT Consider using enumerate
# instead of iterating with range and len
for i in range(len(parents)):
rti = getChrPartTypeByNotation(parents[i])
pnum = chrom_num+parents[i]
sta = int(start)
sto = int(stop)
if pnum not in mybands.keys():
# add the parental band to the hash
b = {'min': min(sta, sto),
'max': max(sta, sto),
'chr': chrom_num,
'ref': build_id,
'parent': None,
'stain': None,
'type': rti}
mybands[pnum] = b
else:
# band already in the hash means it's a grouping band
# need to update the min/max coords
b = mybands.get(pnum)
b['min'] = min(sta, sto, b['min'])
b['max'] = max(sta, sto, b['max'])
mybands[pnum] = b
# also, set the max for the chrom
c = mybands.get(chrom_num)
c['max'] = max(sta, sto, c['max'])
mybands[chrom_num] = c
# add the parent relationships to each
if i < len(parents) - 1:
mybands[pnum]['parent'] = chrom_num+parents[i+1]
else:
# add the last one (p or q usually)
# as attached to the chromosome
mybands[pnum]['parent'] = chrom_num
f.close() # end looping through file
# loop through the hash and add the bands to the graph
for b in mybands.keys():
myband = mybands.get(b)
band_class_id = makeChromID(b, taxon, 'CHR')
band_class_label = makeChromLabel(b, genome_label)
band_build_id = makeChromID(b, build_num, 'MONARCH')
band_build_label = makeChromLabel(b, build_num)
# the build-specific chrom
chrom_in_build_id = makeChromID(
myband['chr'], build_num, 'MONARCH')
# if it's != part, then add the class
if myband['type'] != Feature.types['assembly_component']:
model.addClassToGraph(band_class_id,
band_class_label, myband['type'])
bfeature = Feature(self.graph, band_build_id, band_build_label,
band_class_id)
else:
bfeature = Feature(self.graph, band_build_id, band_build_label,
myband['type'])
if 'synonym' in myband:
model.addSynonym(band_build_id, myband['synonym'])
if myband['parent'] is None:
if myband['type'] == Feature.types['assembly_component']:
# since we likely don't know the chr,
# add it as a part of the build
geno.addParts(band_build_id, build_id)
elif myband['type'] == Feature.types['assembly_component']:
# geno.addParts(band_build_id, chrom_in_build_id)
parent_chrom_in_build = makeChromID(myband['parent'],
build_num, 'MONARCH')
bfeature.addSubsequenceOfFeature(parent_chrom_in_build)
# add the band as a feature
# (which also instantiates the owl:Individual)
bfeature.addFeatureStartLocation(myband['min'], chrom_in_build_id)
bfeature.addFeatureEndLocation(myband['max'], chrom_in_build_id)
if 'stain' in myband and myband['stain'] is not None:
# TODO 'has_staining_intensity' being dropped by MB
bfeature.addFeatureProperty(
Feature.properties['has_staining_intensity'],
myband['stain'])
# type the band as a faldo:Region directly (add_region=False)
# bfeature.setNoBNodes(self.nobnodes)
# to come when we merge in ZFIN.py
bfeature.addFeatureToGraph(False)
return
示例5: _process_phenotype_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addParts [as 别名]
#.........这里部分代码省略.........
limit is not None and reader.line_num > limit):
break
# now that we've collected all of the variant information, build it
# we don't know their zygosities
for s in self.strain_hash:
h = self.strain_hash.get(s)
variants = h['variants']
genes = h['genes']
vl_set = set()
# make variant loci for each gene
if len(variants) > 0:
for var in variants:
vl_id = var.strip()
vl_symbol = self.id_label_hash[vl_id]
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
vl_set.add(vl_id)
if len(variants) == 1 and len(genes) == 1:
for gene in genes:
geno.addAlleleOfGene(vl_id, gene)
else:
geno.addAllele(vl_id, vl_symbol)
else: # len(vars) == 0
# it's just anonymous variants in some gene
for gene in genes:
vl_id = '_:' + re.sub(r':', '', gene) + '-VL'
vl_symbol = self.id_label_hash[gene]+'<?>'
self.id_label_hash[vl_id] = vl_symbol
geno.addAllele(
vl_id, vl_symbol, self.globaltt['variant_locus'])
geno.addGene(gene, self.id_label_hash[gene])
geno.addAlleleOfGene(vl_id, gene)
vl_set.add(vl_id)
# make the vslcs
vl_list = sorted(vl_set)
vslc_list = []
for vl in vl_list:
# for unknown zygosity
vslc_id = re.sub(r'^_', '', vl)+'U'
vslc_id = re.sub(r':', '', vslc_id)
vslc_id = '_:' + vslc_id
vslc_label = self.id_label_hash[vl] + '/?'
self.id_label_hash[vslc_id] = vslc_label
vslc_list.append(vslc_id)
geno.addPartsToVSLC(
vslc_id, vl, None, self.globaltt['indeterminate'],
self.globaltt['has_variant_part'], None)
model.addIndividualToGraph(
vslc_id, vslc_label,
self.globaltt['variant single locus complement'])
if len(vslc_list) > 0:
if len(vslc_list) > 1:
gvc_id = '-'.join(vslc_list)
gvc_id = re.sub(r'_|:', '', gvc_id)
gvc_id = '_:'+gvc_id
gvc_label = '; '.join(self.id_label_hash[v] for v in vslc_list)
model.addIndividualToGraph(
gvc_id, gvc_label,
self.globaltt['genomic_variation_complement'])
for vslc_id in vslc_list:
geno.addVSLCtoParent(vslc_id, gvc_id)
else:
# the GVC == VSLC, so don't have to make an extra piece
gvc_id = vslc_list.pop()
gvc_label = self.id_label_hash[gvc_id]
genotype_label = gvc_label + ' [n.s.]'
bkgd_id = re.sub(
r':', '', '-'.join((
self.globaltt['unspecified_genomic_background'], s)))
genotype_id = '-'.join((gvc_id, bkgd_id))
bkgd_id = '_:' + bkgd_id
geno.addTaxon(mouse_taxon, bkgd_id)
geno.addGenomicBackground(
bkgd_id, 'unspecified (' + s + ')',
self.globaltt['unspecified_genomic_background'],
"A placeholder for the unspecified genetic background for " + s)
geno.addGenomicBackgroundToGenotype(
bkgd_id, genotype_id,
self.globaltt['unspecified_genomic_background'])
geno.addParts(
gvc_id, genotype_id, self.globaltt['has_variant_part'])
geno.addGenotype(genotype_id, genotype_label)
graph.addTriple(
s, self.globaltt['has_genotype'], genotype_id)
else:
# LOG.debug(
# "Strain %s is not making a proper genotype.", s)
pass
LOG.warning(
"The following gene symbols did not list identifiers: %s",
str(sorted(list(genes_with_no_ids))))
LOG.error(
'%i symbols given are missing their gene identifiers',
len(genes_with_no_ids))
return