本文整理汇总了Python中dipper.models.Genotype.Genotype.addChromosomeClass方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addChromosomeClass方法的具体用法?Python Genotype.addChromosomeClass怎么用?Python Genotype.addChromosomeClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addChromosomeClass方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_variants
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
#.........这里部分代码省略.........
continue
# TODO may need to switch on assembly to create correct
# assembly/build identifiers
build_id = ':'.join(('NCBIGenome', assembly))
# make the reference genome build
geno.addReferenceGenome(build_id, assembly, tax_id)
allele_type_id = self._map_type_of_allele(allele_type)
bandinbuild_id = None
if str(chr) == '':
# check cytogenic location
if str(cytogenetic_loc).strip() != '':
# use cytogenic location to get the apx location
# oddly, they still put an assembly number even when
# there's no numeric location
if not re.search(r'-', str(cytogenetic_loc)):
band_id = makeChromID(
re.split(r'-', str(cytogenetic_loc)),
tax_num, 'CHR')
geno.addChromosomeInstance(
cytogenetic_loc, build_id, assembly, band_id)
bandinbuild_id = makeChromID(
re.split(r'-', str(cytogenetic_loc)),
assembly, 'MONARCH')
else:
# can't deal with ranges yet
pass
else:
# add the human chromosome class to the graph,
# and add the build-specific version of it
chr_id = makeChromID(str(chr), tax_num, 'CHR')
geno.addChromosomeClass(str(chr), tax_id, tax_label)
geno.addChromosomeInstance(
str(chr), build_id, assembly, chr_id)
chrinbuild_id = makeChromID(str(chr), assembly, 'MONARCH')
seqalt_id = ':'.join(('ClinVarVariant', variant_num))
gene_id = None
# they use -1 to indicate unknown gene
if str(gene_num) != '-1' and str(gene_num) != 'more than 10':
if re.match(r'^Gene:', gene_num):
gene_num = "NCBI" + gene_num
else:
gene_id = ':'.join(('NCBIGene', str(gene_num)))
# FIXME there are some "variants" that are actually haplotypes
# probably will get taken care of when we switch to processing
# the xml for example, variant_num = 38562
# but there's no way to tell if it's a haplotype
# in the csv data so the dbsnp or dbvar
# should probably be primary,
# and the variant num be the vslc,
# with each of the dbsnps being added to it
# TODO clinical significance needs to be mapped to
# a list of terms
# first, make the variant:
f = Feature(seqalt_id, allele_name, allele_type_id)
if start != '-' and start.strip() != '':
f.addFeatureStartLocation(start, chrinbuild_id)
if stop != '-' and stop.strip() != '':
f.addFeatureEndLocation(stop, chrinbuild_id)
示例2: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
def _get_chrbands(self, limit, taxon):
"""
For the given taxon, it will fetch the chr band file.
We will not deal with the coordinate information with this parser.
Here, we only are concerned with building the partonomy.
:param limit:
:return:
"""
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
logger.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:'+taxon
# add the taxon as a class. adding the class label elsewhere
self.gu.addClassToGraph(self.graph, taxon_id, None)
self.gu.addSynonym(self.graph, taxon_id, genome_label)
self.gu.loadObjectProperties(self.graph, Feature.object_properties)
genome_id = geno.makeGenomeID(taxon_id)
geno.addGenome(taxon_id, genome_label)
self.gu.addOWLPropertyClassRestriction(
self.graph, genome_id, Genotype.object_properties['in_taxon'],
taxon_id)
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match(r'^#', line):
continue
# chr13 4500000 10000000 p12 stalk
(chrom, start, stop, band, rtype) = line.split('\t')
line_counter += 1
# NOTE
# some less-finished genomes have placed and unplaced scaffolds
# * Placed scaffolds:
# Scaffold has an oriented location within a chromosome.
# * Unlocalized scaffolds:
# scaffold 's chromosome is known,
# scaffold's position, orientation or both is not known.
# *Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to.
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
# TODO unused
# unlocalized_scaffold_pattern = \
# placed_scaffold_pattern + r'_(\w+)_random'
# unplaced_scaffold_pattern = r'chrUn_(\w+)'
m = re.match(placed_scaffold_pattern+r'$', chrom)
if m is not None and len(m.groups()) == 1:
# the chromosome is the first match of the pattern
# ch = m.group(1) # TODO unused
pass
else:
# let's skip over anything that isn't a placed_scaffold
# at the class level
logger.info("Skipping non-placed chromosome %s", chrom)
continue
# the chrom class, taxon as the reference
cclassid = makeChromID(chrom, taxon, 'CHR')
# add the chromosome as a class
geno.addChromosomeClass(chrom, taxon_id, genome_label)
self.gu.addOWLPropertyClassRestriction(
self.graph, cclassid,
self.gu.object_properties['member_of'], genome_id)
# add the band(region) as a class
maplocclass_id = cclassid+band
maplocclass_label = makeChromLabel(chrom+band, genome_label)
if band is not None and band.strip() != '':
region_type_id = self.map_type_of_region(rtype)
self.gu.addClassToGraph(
self.graph, maplocclass_id, maplocclass_label,
region_type_id)
else:
region_type_id = Feature.types['chromosome']
# add the staining intensity of the band
if re.match(r'g(neg|pos|var)', rtype):
if region_type_id in [
Feature.types['chromosome_band'],
Feature.types['chromosome_subband']]:
stain_type = Feature.types.get(rtype)
if stain_type is not None:
self.gu.addOWLPropertyClassRestriction(
self.graph, maplocclass_id,
Feature.properties['has_staining_intensity'],
#.........这里部分代码省略.........
示例3: _get_gene_info
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
#.........这里部分代码省略.........
# TODO might have to figure out if things aren't genes, and make them individuals
gu.addClassToGraph(g, gene_id, label, gene_type_id, desc)
# we have to do special things here for genes, because they're classes not individuals
# f = Feature(gene_id,label,gene_type_id,desc)
if name != '-':
gu.addSynonym(g, gene_id, name)
if synonyms.strip() != '-':
for s in synonyms.split('|'):
gu.addSynonym(g, gene_id, s.strip(), Assoc.annotation_properties['hasRelatedSynonym'])
if other_designations.strip() != '-':
for s in other_designations.split('|'):
gu.addSynonym(g, gene_id, s.strip(), Assoc.annotation_properties['hasRelatedSynonym'])
# deal with the xrefs
# MIM:614444|HGNC:HGNC:16851|Ensembl:ENSG00000136828|HPRD:11479|Vega:OTTHUMG00000020696
if xrefs.strip() != '-':
for r in xrefs.strip().split('|'):
fixedr = self._cleanup_id(r)
if fixedr is not None and fixedr.strip() != '':
if re.match('HPRD', fixedr):
# proteins are not == genes.
gu.addTriple(g, gene_id, self.properties['has_gene_product'], fixedr)
else:
# skip some of these for now
if fixedr.split(':')[0] not in ['Vega', 'IMGT/GENE-DB']:
gu.addEquivalentClass(g, gene_id, fixedr)
# edge cases of id | symbol | chr | map_loc:
# 263 AMD1P2 X|Y with Xq28 and Yq12
# 438 ASMT X|Y with Xp22.3 or Yp11.3 # in PAR
# 419 ART3 4 with 4q21.1|4p15.1-p14 # no idea why there's two bands listed - possibly 2 assemblies
# 28227 PPP2R3B X|Y Xp22.33; Yp11.3 # in PAR
# 619538 OMS 10|19|3 10q26.3;19q13.42-q13.43;3p25.3 #this is of "unknown" type == susceptibility
# 101928066 LOC101928066 1|Un - # unlocated scaffold
# 11435 Chrna1 2 2 C3|2 43.76 cM # mouse --> 2C3
# 11548 Adra1b 11 11 B1.1|11 25.81 cM # mouse --> 11B1.1
# 11717 Ampd3 7 7 57.85 cM|7 E2-E3 # mouse
# 14421 B4galnt1 10 10 D3|10 74.5 cM # mouse
# 323212 wu:fb92e12 19|20 - # fish
# 323368 ints10 6|18 - # fish
# 323666 wu:fc06e02 11|23 - # fish
# feel that the chr placement can't be trusted in this table when there is > 1 listed
# with the exception of human X|Y, i will only take those that align to one chr
# FIXME remove the chr mapping below when we pull in the genomic coords
if str(chr) != '-' and str(chr) != '':
if re.search('\|', str(chr)) and str(chr) not in ['X|Y','X; Y']:
# this means that there's uncertainty in the mapping. skip it
# TODO we'll need to figure out how to deal with >1 loc mapping
logger.info('%s is non-uniquely mapped to %s. Skipping for now.', gene_id, str(chr))
continue
# X|Y Xp22.33;Yp11.3
# if (not re.match('(\d+|(MT)|[XY]|(Un)$',str(chr).strip())):
# print('odd chr=',str(chr))
if str(chr) == 'X; Y':
chr = 'X|Y' # rewrite the PAR regions for processing
# do this in a loop to allow PAR regions like X|Y
for c in re.split('\|',str(chr)) :
geno.addChromosomeClass(c, tax_id, None) # assume that the chromosome label will get added elsewhere
mychrom = makeChromID(c, tax_num, 'CHR')
mychrom_syn = makeChromLabel(c, tax_num) # temporarily use the taxnum for the disambiguating label
gu.addSynonym(g, mychrom, mychrom_syn)
band_match = re.match('[0-9A-Z]+[pq](\d+)?(\.\d+)?$', map_loc)
if band_match is not None and len(band_match.groups()) > 0:
# if tax_num != '9606':
# continue
# this matches the regular kind of chrs, so make that kind of band
# not sure why this matches? chrX|Y or 10090chr12|Un"
# TODO we probably need a different regex per organism
# the maploc_id already has the numeric chromosome in it, strip it first
bid = re.sub('^'+c, '', map_loc)
maploc_id = makeChromID(c+bid, tax_num, 'CHR') # the generic location (no coordinates)
# print(map_loc,'-->',bid,'-->',maploc_id)
band = Feature(maploc_id, None, None) # Assume it's type will be added elsewhere
band.addFeatureToGraph(g)
# add the band as the containing feature
gu.addTriple(g, gene_id, Feature.object_properties['is_subsequence_of'], maploc_id)
else:
# TODO handle these cases
# examples are: 15q11-q22, Xp21.2-p11.23, 15q22-qter, 10q11.1-q24,
## 12p13.3-p13.2|12p13-p12, 1p13.3|1p21.3-p13.1, 12cen-q21, 22q13.3|22q13.3
logger.debug('not regular band pattern for %s: %s', gene_id, map_loc)
# add the gene as a subsequence of the chromosome
gu.addTriple(g, gene_id, Feature.object_properties['is_subsequence_of'], mychrom)
geno.addTaxon(tax_id, gene_id)
if not self.testMode and limit is not None and line_counter > limit:
break
gu.loadProperties(g, Feature.object_properties, gu.OBJPROP)
gu.loadProperties(g, Feature.data_properties, gu.DATAPROP)
gu.loadProperties(g, Genotype.object_properties, gu.OBJPROP)
gu.loadAllProperties(g)
return
示例4: _transform_entry
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
#.........这里部分代码省略.........
feature_id = omimid
is_gene = True
else:
# 158900 falls into this category
feature_id = self._make_anonymous_feature(str(omimnum))
if abbrev is not None:
feature_label = abbrev
omimtype = \
Genotype.genoparts[
'heritable_phenotypic_marker']
if feature_id is not None:
if 'comments' in genemap:
# add a comment to this feature
comment = genemap['comments']
if comment.strip() != '':
model.addDescription(feature_id, comment)
if 'cytoLocation' in genemap:
cytoloc = genemap['cytoLocation']
# parse the cytoloc.
# add this omim thing as
# a subsequence of the cytofeature
# 18p11.3-p11.2
# FIXME
# add the other end of the range,
# but not sure how to do that
# not sure if saying subsequence of feature
# is the right relationship
f = Feature(g, feature_id, feature_label, omimtype)
if 'chromosomeSymbol' in genemap:
chrom_num = str(genemap['chromosomeSymbol'])
chrom = makeChromID(chrom_num, tax_num, 'CHR')
geno.addChromosomeClass(
chrom_num, tax_id, tax_label)
# add the positional information, if available
fstart = fend = -1
if 'chromosomeLocationStart' in genemap:
fstart = genemap['chromosomeLocationStart']
if 'chromosomeLocationEnd' in genemap:
fend = genemap['chromosomeLocationEnd']
if fstart >= 0:
# make the build-specific chromosome
chrom_in_build = makeChromID(chrom_num,
build_num,
'MONARCH')
# then, add the chromosome instance
# (from the given build)
geno.addChromosomeInstance(
chrom_num, build_id, build_num, chrom)
if omimtype == \
Genotype.genoparts[
'heritable_phenotypic_marker']:
postypes = [Feature.types['FuzzyPosition']]
else:
postypes = None
# NOTE that no strand information
# is available in the API
f.addFeatureStartLocation(
fstart, chrom_in_build, None, postypes)
if fend >= 0:
f.addFeatureEndLocation(
fend, chrom_in_build, None, postypes)
if fstart > fend:
logger.info(
示例5: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
def _get_chrbands(self, limit, taxon):
"""
:param limit:
:return:
"""
model = Model(self.graph)
# TODO PYLINT figure out what limit was for and why it is unused
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
logger.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
monochrom = Monochrom(self.graph_type, self.are_bnodes_skized)
# used to hold band definitions for a chr
# in order to compute extent of encompasing bands
mybands = {}
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:'+taxon
# add the taxon as a class. adding the class label elsewhere
model.addClassToGraph(taxon_id, None)
model.addSynonym(taxon_id, genome_label)
geno.addGenome(taxon_id, genome_label)
# add the build and the taxon it's in
build_num = self.files[taxon]['build_num']
build_id = 'UCSC:'+build_num
geno.addReferenceGenome(build_id, build_num, taxon_id)
# process the bands
with gzip.open(myfile, 'rb') as f:
for line in f:
# skip comments
line = line.decode().strip()
if re.match('^#', line):
continue
# chr13 4500000 10000000 p12 stalk
(scaffold, start, stop, band_num, rtype) = line.split('\t')
line_counter += 1
# NOTE some less-finished genomes have
# placed and unplaced scaffolds
# * Placed scaffolds:
# the scaffolds have been placed within a chromosome.
# * Unlocalized scaffolds:
# although the chromosome within which the scaffold occurs
# is known, the scaffold's position or orientation
# is not known.
# * Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to
#
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
placed_scaffold_pattern = r'(chr(?:\d+|X|Y|Z|W|M))'
unlocalized_scaffold_pattern = \
placed_scaffold_pattern+r'_(\w+)_random'
unplaced_scaffold_pattern = r'chr(Un(?:_\w+)?)'
m = re.match(placed_scaffold_pattern+r'$', scaffold)
if m is not None and len(m.groups()) == 1:
# the chromosome is the first match of the pattern
chrom_num = m.group(1)
else:
# skip over anything that isn't a placed_scaffold
# at the class level
logger.info("Found non-placed chromosome %s", scaffold)
chrom_num = None
m_chr_unloc = re.match(unlocalized_scaffold_pattern, scaffold)
m_chr_unplaced = re.match(unplaced_scaffold_pattern, scaffold)
scaffold_num = None
if m:
pass
elif m_chr_unloc is not None and\
len(m_chr_unloc.groups()) == 2:
chrom_num = m_chr_unloc.group(1)
scaffold_num = chrom_num+'_'+m_chr_unloc.group(2)
elif m_chr_unplaced is not None and\
len(m_chr_unplaced.groups()) == 1:
scaffold_num = m_chr_unplaced.group(1)
else:
logger.error(
"There's a chr pattern that we aren't matching: %s",
scaffold)
if chrom_num is not None:
# the chrom class (generic) id
chrom_class_id = makeChromID(chrom_num, taxon, 'CHR')
# first, add the chromosome class (in the taxon)
geno.addChromosomeClass(
chrom_num, taxon_id, self.files[taxon]['genome_label'])
#.........这里部分代码省略.........
示例6: _get_gene_info
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
#.........这里部分代码省略.........
if xrefs.strip() != '-':
self._add_gene_equivalencies(xrefs, gene_id, tax_num)
# edge cases of id | symbol | chr | map_loc:
# 263 AMD1P2 X|Y with Xq28 and Yq12
# 438 ASMT X|Y with Xp22.3 or Yp11.3 # in PAR
# no idea why there's two bands listed - possibly 2 assemblies
# 419 ART3 4 with 4q21.1|4p15.1-p14
# 28227 PPP2R3B X|Y Xp22.33; Yp11.3 # in PAR
# this is of "unknown" type == susceptibility
# 619538 OMS 10|19|3 10q26.3;19q13.42-q13.43;3p25.3
# unlocated scaffold
# 101928066 LOC101928066 1|Un -\
# mouse --> 2C3
# 11435 Chrna1 2 2 C3|2 43.76 cM
# mouse --> 11B1.1
# 11548 Adra1b 11 11 B1.1|11 25.81 cM
# 11717 Ampd3 7 7 57.85 cM|7 E2-E3 # mouse
# 14421 B4galnt1 10 10 D3|10 74.5 cM # mouse
# 323212 wu:fb92e12 19|20 - # fish
# 323368 ints10 6|18 - # fish
# 323666 wu:fc06e02 11|23 - # fish
# feel that the chr placement can't be trusted in this table
# when there is > 1 listed
# with the exception of human X|Y,
# we will only take those that align to one chr
# FIXME remove the chr mapping below
# when we pull in the genomic coords
if str(chrom) != '-' and str(chrom) != '':
if re.search(r'\|', str(chrom)) and \
str(chrom) not in ['X|Y', 'X; Y']:
# means that there's uncertainty in the mapping.
# so skip it
# TODO we'll need to figure out how to deal with
# >1 loc mapping
logger.info(
'%s is non-uniquely mapped to %s.' +
' Skipping for now.',
gene_id, str(chr))
continue
# X|Y Xp22.33;Yp11.3
# if(not re.match(
# r'(\d+|(MT)|[XY]|(Un)$',str(chr).strip())):
# print('odd chr=',str(chr))
if str(chrom) == 'X; Y':
chrom = 'X|Y' # rewrite the PAR regions for processing
# do this in a loop to allow PAR regions like X|Y
for c in re.split(r'\|', str(chrom)):
# assume that the chromosome label is added elsewhere
geno.addChromosomeClass(c, tax_id, None)
mychrom = makeChromID(c, tax_num, 'CHR')
# temporarily use taxnum for the disambiguating label
mychrom_syn = makeChromLabel(c, tax_num)
model.addSynonym(mychrom, mychrom_syn)
band_match = re.match(
r'[0-9A-Z]+[pq](\d+)?(\.\d+)?$', map_loc)
if band_match is not None and \
len(band_match.groups()) > 0:
# if tax_num != '9606':
# continue
# this matches the regular kind of chrs,
# so make that kind of band
# not sure why this matches?
# chrX|Y or 10090chr12|Un"
# TODO we probably need a different regex
# per organism
# the maploc_id already has the numeric chromosome
# in it, strip it first
bid = re.sub(r'^'+c, '', map_loc)
# the generic location (no coordinates)
maploc_id = makeChromID(c+bid, tax_num, 'CHR')
# print(map_loc,'-->',bid,'-->',maploc_id)
# Assume it's type will be added elsewhere
band = Feature(g, maploc_id, None, None)
band.addFeatureToGraph()
# add the band as the containing feature
g.addTriple(
gene_id,
Feature.object_properties['is_subsequence_of'],
maploc_id)
else:
# TODO handle these cases: examples are:
# 15q11-q22,Xp21.2-p11.23,15q22-qter,10q11.1-q24,
# 12p13.3-p13.2|12p13-p12,1p13.3|1p21.3-p13.1,
# 12cen-q21,22q13.3|22q13.3
logger.debug(
'not regular band pattern for %s: %s',
gene_id, map_loc)
# add the gene as a subsequence of the chromosome
g.addTriple(
gene_id,
Feature.object_properties['is_subsequence_of'],
mychrom)
geno.addTaxon(tax_id, gene_id)
return
示例7: _process_all
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
#.........这里部分代码省略.........
if 'includedTitles' in titles:
other_labels += self._get_alt_labels(titles['includedTitles'])
# add synonyms of alternate labels
# preferredTitle": "PFEIFFER SYNDROME",
# "alternativeTitles": "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
# "includedTitles": "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"
# remove the abbreviation (comes after the ;) from the preferredTitle, and add it as a synonym
abbrev = None
if len(re.split(';', label)) > 1:
abbrev = (re.split(';', label)[1].strip())
newlabel = self._cleanup_label(label)
description = self._get_description(e['entry'])
omimid = 'OMIM:'+str(omimnum)
if e['entry']['status'] == 'removed':
gu.addDeprecatedClass(g, omimid)
else:
omimtype = self._get_omimtype(e['entry'])
# this uses our cleaned-up label
gu.addClassToGraph(g, omimid, newlabel, omimtype)
# add the original OMIM label as a synonym
gu.addSynonym(g, omimid, label)
# add the alternate labels and includes as synonyms
for l in other_labels:
gu.addSynonym(g, omimid, l)
# for OMIM, we're adding the description as a definition
gu.addDefinition(g, omimid, description)
if abbrev is not None:
gu.addSynonym(g, omimid, abbrev)
# if this is a genetic locus (but not sequenced) then add the chrom loc info
if omimtype == Genotype.genoparts['biological_region']:
if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
genemap = e['entry']['geneMap']
if 'cytoLocation' in genemap:
cytoloc = genemap['cytoLocation']
# parse the cytoloc. add this omim thing as a subsequence of the cytofeature
# 18p11.3-p11.2
# for now, just take the first one
# FIXME add the other end of the range, but not sure how to do that
# not sure if saying subsequence of feature is the right relationship
cytoloc = cytoloc.split('-')[0]
f = Feature(omimid, None, None)
if 'chromosome' in genemap:
chrom = makeChromID(str(genemap['chromosome']), tax_num, 'CHR')
geno.addChromosomeClass(str(genemap['chromosome']), tax_id, tax_label)
loc = makeChromID(cytoloc, tax_num, 'CHR')
gu.addClassToGraph(g, loc, cytoloc) # this is the chr band
f.addSubsequenceOfFeature(g, loc)
f.addFeatureToGraph(g)
pass
# check if moved, if so, make it deprecated and replaced/consider class to the other thing(s)
# some entries have been moved to multiple other entries and use the joining raw word "and"
# 612479 is movedto: "603075 and 603029" OR
# others use a comma-delimited list, like:
# 610402 is movedto: "609122,300870"
if e['entry']['status'] == 'moved':
if re.search('and', str(e['entry']['movedTo'])):
# split the movedTo entry on 'and'
newids = re.split('and', str(e['entry']['movedTo']))
elif len(str(e['entry']['movedTo']).split(',')) > 0:
# split on the comma
newids = str(e['entry']['movedTo']).split(',')
else:
# make a list of one
newids = [str(e['entry']['movedTo'])]
# cleanup whitespace and add OMIM prefix to numeric portion
fixedids = []
for i in newids:
fixedids.append('OMIM:'+i.strip())
gu.addDeprecatedClass(g, omimid, fixedids)
self._get_phenotypicseries_parents(e['entry'], g)
self._get_mappedids(e['entry'], g)
self._get_pubs(e['entry'], g)
self._get_process_allelic_variants(e['entry'], g)
### end iterating over batch of entries
# can't have more than 4 req per sec,
# so wait the remaining time, if necessary
dt = datetime.now() - request_time
rem = 0.25 - dt.total_seconds()
if rem > 0:
logger.info("waiting %d sec", rem)
time.sleep(rem/1000)
gu.loadAllProperties(g)
return
示例8: _get_chrbands
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addChromosomeClass [as 别名]
def _get_chrbands(self, limit, taxon):
"""
For the given taxon, it will fetch the chr band file.
We will not deal with the coordinate information with this parser.
Here, we only are concerned with building the partonomy.
:param limit:
:return:
"""
model = Model(self.graph)
line_counter = 0
myfile = '/'.join((self.rawdir, self.files[taxon]['file']))
LOG.info("Processing Chr bands from FILE: %s", myfile)
geno = Genotype(self.graph)
# build the organism's genome from the taxon
genome_label = self.files[taxon]['genome_label']
taxon_id = 'NCBITaxon:' + taxon
# add the taxon as a class. adding the class label elsewhere
model.addClassToGraph(taxon_id, None)
model.addSynonym(taxon_id, genome_label)
genome_id = geno.makeGenomeID(taxon_id)
geno.addGenome(taxon_id, genome_label)
model.addOWLPropertyClassRestriction(
genome_id, self.globaltt['in taxon'], taxon_id)
placed_scaffold_pattern = r'chr(\d+|X|Y|Z|W|MT|M)'
# currently unused patterns
# unlocalized_scaffold_pattern = placed_scaffold_pattern + r'_(\w+)_random'
# unplaced_scaffold_pattern = r'chrUn_(\w+)'
col = ['chrom', 'start', 'stop', 'band', 'rtype']
with gzip.open(myfile, 'rb') as reader:
for line in reader:
line_counter += 1
# skip comments
line = line.decode().strip()
if line[0] == '#':
continue
# chr13 4500000 10000000 p12 stalk
row = line.split('\t')
chrom = row[col.index('chrom')]
band = row[col.index('band')]
rtype = row[col.index('rtype')]
# NOTE
# some less-finished genomes have placed and unplaced scaffolds
# * Placed scaffolds:
# Scaffold has an oriented location within a chromosome.
# * Unlocalized scaffolds:
# scaffold 's chromosome is known,
# scaffold's position, orientation or both is not known.
# *Unplaced scaffolds:
# it is not known which chromosome the scaffold belongs to.
# find out if the thing is a full on chromosome, or a scaffold:
# ex: unlocalized scaffold: chr10_KL568008v1_random
# ex: unplaced scaffold: chrUn_AABR07022428v1
mch = re.match(placed_scaffold_pattern+r'$', chrom)
if mch is not None and len(mch.groups()) == 1:
# the chromosome is the first match of the pattern
# chrom = m.group(1) # TODO unused
pass
else:
# let's skip over anything that isn't a placed_scaffold
LOG.info("Skipping non-placed chromosome %s", chrom)
continue
# the chrom class, taxon as the reference
cclassid = makeChromID(chrom, taxon, 'CHR')
# add the chromosome as a class
geno.addChromosomeClass(chrom, taxon_id, genome_label)
model.addOWLPropertyClassRestriction(
cclassid, self.globaltt['member of'], genome_id)
# add the band(region) as a class
maplocclass_id = cclassid+band
maplocclass_label = makeChromLabel(chrom+band, genome_label)
if band is not None and band.strip() != '':
region_type_id = self.map_type_of_region(rtype)
model.addClassToGraph(
maplocclass_id, maplocclass_label,
region_type_id)
else:
region_type_id = self.globaltt['chromosome']
# add the staining intensity of the band
if re.match(r'g(neg|pos|var)', rtype):
if region_type_id in [
self.globaltt['chromosome_band'],
self.globaltt['chromosome_subband']]:
stain_type = self.resolve(rtype)
if stain_type is not None:
model.addOWLPropertyClassRestriction(
maplocclass_id,
self.globaltt['has_sequence_attribute'],
self.resolve(rtype))
else:
# usually happens if it's a chromosome because
# they don't actually have banding info
#.........这里部分代码省略.........