本文整理汇总了Python中dipper.models.Genotype.Genotype.addSequenceAlteration方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addSequenceAlteration方法的具体用法?Python Genotype.addSequenceAlteration怎么用?Python Genotype.addSequenceAlteration使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addSequenceAlteration方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_allele_phenotype
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlteration [as 别名]
def process_allele_phenotype(self, limit=None):
"""
This file compactly lists variant to phenotype associations,
such that in a single row, there may be >1 variant listed
per phenotype and paper. This indicates that each variant is
individually assocated with the given phenotype,
as listed in 1+ papers.
(Not that the combination of variants is producing the phenotype.)
:param limit:
:return:
"""
raw = '/'.join((self.rawdir, self.files['allele_pheno']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
# gu = GraphUtils(curie_map.get()) # TODO unused
logger.info("Processing Allele phenotype associations")
line_counter = 0
geno = Genotype(g)
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
if re.match(r'!', ''.join(row)): # header
continue
line_counter += 1
(db, gene_num, gene_symbol, is_not, phenotype_id, ref,
eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
gene_class, taxon, date, assigned_by, blank, blank2) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
# TODO add NOT phenotypes
if is_not == 'NOT':
continue
eco_id = None
if eco_symbol == 'IMP':
eco_id = 'ECO:0000015'
elif eco_symbol.strip() != '':
logger.warning(
"Encountered an ECO code we don't have: %s",
eco_symbol)
# according to the GOA spec, persons are not allowed to be
# in the reference column, therefore they the variant and
# persons are swapped between the reference and with column.
# we unswitch them here.
temp_var = temp_ref = None
if re.search(r'WBVar|WBRNAi', ref):
temp_var = ref
# move the paper from the with column into the ref
if re.search(r'WBPerson', with_or_from):
temp_ref = with_or_from
if temp_var is not None or temp_ref is not None:
with_or_from = temp_var
ref = temp_ref
allele_list = re.split(r'\|', with_or_from)
if len(allele_list) == 0:
logger.error(
"Missing alleles from phenotype assoc at line %d",
line_counter)
continue
else:
for a in allele_list:
allele_num = re.sub(r'WB:', '', a.strip())
allele_id = 'WormBase:'+allele_num
gene_id = 'WormBase:'+gene_num
if re.search(r'WBRNAi', allele_id):
# make the reagent-targeted gene,
# & annotate that instead of the RNAi item directly
rnai_num = re.sub(r'WormBase:', '', allele_id)
rnai_id = allele_id
rtg_id = self.make_reagent_targeted_gene_id(
gene_num, rnai_num, self.nobnodes)
geno.addReagentTargetedGene(
rnai_id, 'WormBase:'+gene_num, rtg_id)
geno.addGeneTargetingReagent(
rnai_id, None, geno.genoparts['RNAi_reagent'],
gene_id)
allele_id = rtg_id
elif re.search(r'WBVar', allele_id):
# this may become deprecated by using wormmine
# make the allele to gene relationship
# the WBVars are really sequence alterations
# the public name will come from elsewhere
geno.addSequenceAlteration(allele_id, None)
vl_id = '_'+'-'.join((gene_num, allele_num))
if self.nobnodes:
vl_id = ':'+vl_id
geno.addSequenceAlterationToVariantLocus(
#.........这里部分代码省略.........
示例2: _parse_patient_variants
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlteration [as 别名]
def _parse_patient_variants(self, file):
"""
:param file: file handler
:return:
"""
patient_var_map = self._convert_variant_file_to_dict(file)
gene_coordinate_map = self._parse_gene_coordinates(
self.map_files['gene_coord_map'])
rs_map = self._parse_rs_map_file(self.map_files['dbsnp_map'])
genotype = Genotype(self.graph)
model = Model(self.graph)
self._add_variant_gene_relationship(patient_var_map, gene_coordinate_map)
for patient in patient_var_map:
patient_curie = ':{0}'.format(patient)
# make intrinsic genotype for each patient
intrinsic_geno_bnode = self.make_id(
"{0}-intrinsic-genotype".format(patient), "_")
genotype_label = "{0} genotype".format(patient)
genotype.addGenotype(
intrinsic_geno_bnode, genotype_label,
model.globaltt['intrinsic_genotype'])
self.graph.addTriple(
patient_curie, model.globaltt['has_genotype'], intrinsic_geno_bnode)
for variant_id, variant in patient_var_map[patient].items():
build = variant['build']
chromosome = variant['chromosome']
position = variant['position']
reference_allele = variant['reference_allele']
variant_allele = variant['variant_allele']
genes_of_interest = variant['genes_of_interest']
rs_id = variant['rs_id']
variant_label = ''
variant_bnode = self.make_id("{0}".format(variant_id), "_")
# maybe should have these look like the elif statements below
if position and reference_allele and variant_allele:
variant_label = self._build_variant_label(
build, chromosome, position, reference_allele,
variant_allele, genes_of_interest)
elif not position and reference_allele and variant_allele \
and len(genes_of_interest) == 1:
variant_label = self._build_variant_label(
build, chromosome, position, reference_allele, variant_allele,
genes_of_interest)
elif position and (not reference_allele or not variant_allele) \
and len(genes_of_interest) == 1:
variant_label = "{0}{1}({2}):g.{3}".format(
build, chromosome, genes_of_interest[0], position)
elif len(genes_of_interest) == 1:
variant_label = 'variant of interest in {0} gene of patient' \
' {1}'.format(genes_of_interest[0], patient)
else:
variant_label = 'variant of interest in patient {0}'.format(patient)
genotype.addSequenceAlteration(variant_bnode, None)
# check if it we have built the label
# in _add_variant_gene_relationship()
labels = self.graph.objects(
BNode(re.sub(r'^_:', '', variant_bnode, 1)), RDFS['label'])
label_list = list(labels)
if len(label_list) == 0:
model.addLabel(variant_bnode, variant_label)
self.graph.addTriple(
variant_bnode, self.globaltt['in taxon'],
self.globaltt['Homo sapiens'])
self.graph.addTriple(
intrinsic_geno_bnode, self.globaltt['has_variant_part'],
variant_bnode)
if rs_id:
dbsnp_curie = 'dbSNP:{0}'.format(rs_id)
model.addSameIndividual(variant_bnode, dbsnp_curie)
self._add_variant_sameas_relationships(patient_var_map, rs_map)
return
示例3: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlteration [as 别名]
#.........这里部分代码省略.........
locus_num = None
for v in variant_id.split(';'):
# handle omim-style and odd var ids
# like 610661.p.R401X
m = re.match(r'(\d+)\.+(.*)', v.strip())
if m is not None and len(m.groups()) == 2:
(locus_num, var_num) = m.groups()
if locus_num is not None \
and locus_num not in omim_map:
omim_map[locus_num] = [var_num]
else:
omim_map[locus_num] += [var_num]
for o in omim_map:
# gene_id = 'OMIM:' + o # TODO unused
vslc_id = \
'_' + '-'.join(
[o + '.' + a for a in omim_map.get(o)])
if self.nobnodes:
vslc_id = ':'+vslc_id
vslc_label = vl
# we don't really know the zygosity of
# the alleles at all.
# so the vslcs are just a pot of them
gu.addIndividualToGraph(
g, vslc_id, vslc_label,
geno.genoparts[
'variant_single_locus_complement'])
for v in omim_map.get(o):
# this is actually a sequence alt
allele1_id = 'OMIM:'+o+'.'+v
geno.addSequenceAlteration(allele1_id, None)
# assume that the sa -> var_loc -> gene
# is taken care of in OMIM
geno.addPartsToVSLC(
vslc_id, allele1_id, None,
geno.zygosity['indeterminate'],
geno.object_properties[
'has_alternate_part'])
if vslc_id != gvc_id:
geno.addVSLCtoParent(vslc_id, gvc_id)
if affected == 'unaffected':
# let's just say that this person is wildtype
gu.addType(g, patient_id, geno.genoparts['wildtype'])
elif genotype_id is None:
# make an anonymous genotype id
genotype_id = '_geno'+catalog_id.strip()
if self.nobnodes:
genotype_id = ':'+genotype_id
# add the gvc
if gvc_id is not None:
gu.addIndividualToGraph(
g, gvc_id, gvc_label,
geno.genoparts['genomic_variation_complement'])
# add the gvc to the genotype
if genotype_id is not None:
if affected == 'unaffected':
rel = \
geno.object_properties[
示例4: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlteration [as 别名]
def _process_data(self, raw, limit=None):
logger.info("Processing Data from %s", raw)
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
line_counter = 0
gu.loadAllProperties(g)
gu.loadObjectProperties(g, geno.object_properties)
# Add the taxon as a class
taxon_id = 'NCBITaxon:10090' # map to Mus musculus
gu.addClassToGraph(g, taxon_id, None)
# with open(raw, 'r', encoding="utf8") as csvfile:
with gzip.open(raw, 'rt') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip the header row
for row in filereader:
line_counter += 1
(marker_accession_id, marker_symbol, phenotyping_center,
colony, sex, zygosity, allele_accession_id, allele_symbol,
allele_name, strain_accession_id, strain_name, project_name,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name, top_level_mp_term_id, top_level_mp_term_name,
mp_term_id, mp_term_name, p_value, percentage_change,
effect_size, statistical_method, resource_name) = row
if self.testMode and marker_accession_id not in self.test_ids:
continue
# ##### cleanup some of the identifiers ######
zygosity_id = self._map_zygosity(zygosity)
# colony ids sometimes have <> in them, spaces,
# or other non-alphanumerics and break our system;
# replace these with underscores
colony_id = '_'+re.sub(r'\W+', '_', colony)
if self.nobnodes:
colony_id = ':'+colony_id
if not re.match(r'MGI', allele_accession_id):
allele_accession_id = \
'_IMPC-'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
allele_accession_id = ':'+allele_accession_id
if re.search(r'EUROCURATE', strain_accession_id):
# the eurocurate links don't resolve at IMPC
strain_accession_id = '_'+strain_accession_id
if self.nobnodes:
strain_accession_id = ':'+strain_accession_id
elif not re.match(r'MGI', strain_accession_id):
logger.info(
"Found a strange strain accession...%s",
strain_accession_id)
strain_accession_id = 'IMPC:'+strain_accession_id
######################
# first, add the marker and variant to the graph as with MGI,
# the allele is the variant locus. IF the marker is not known,
# we will call it a sequence alteration. otherwise,
# we will create a BNode for the sequence alteration.
sequence_alteration_id = variant_locus_id = None
variant_locus_name = sequence_alteration_name = None
# extract out what's within the <> to get the symbol
if re.match(r'.*<.*>', allele_symbol):
sequence_alteration_name = \
re.match(r'.*<(.*)>', allele_symbol).group(1)
else:
sequence_alteration_name = allele_symbol
if marker_accession_id is not None and \
marker_accession_id == '':
logger.warning(
"Marker unspecified on row %d", line_counter)
marker_accession_id = None
if marker_accession_id is not None:
variant_locus_id = allele_accession_id
variant_locus_name = allele_symbol
variant_locus_type = geno.genoparts['variant_locus']
geno.addGene(marker_accession_id, marker_symbol,
geno.genoparts['gene'])
geno.addAllele(variant_locus_id, variant_locus_name,
variant_locus_type, None)
geno.addAlleleOfGene(variant_locus_id, marker_accession_id)
sequence_alteration_id = \
'_seqalt'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
sequence_alteration_id = ':'+sequence_alteration_id
geno.addSequenceAlterationToVariantLocus(
sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........