本文整理汇总了Python中dipper.models.Genotype.Genotype.addSequenceAlterationToVariantLocus方法的典型用法代码示例。如果您正苦于以下问题:Python Genotype.addSequenceAlterationToVariantLocus方法的具体用法?Python Genotype.addSequenceAlterationToVariantLocus怎么用?Python Genotype.addSequenceAlterationToVariantLocus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dipper.models.Genotype.Genotype
的用法示例。
在下文中一共展示了Genotype.addSequenceAlterationToVariantLocus方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_allele_phenotype
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlterationToVariantLocus [as 别名]
def process_allele_phenotype(self, limit=None):
"""
This file compactly lists variant to phenotype associations,
such that in a single row, there may be >1 variant listed
per phenotype and paper. This indicates that each variant is
individually assocated with the given phenotype,
as listed in 1+ papers.
(Not that the combination of variants is producing the phenotype.)
:param limit:
:return:
"""
raw = '/'.join((self.rawdir, self.files['allele_pheno']['file']))
if self.testMode:
g = self.testgraph
else:
g = self.graph
# gu = GraphUtils(curie_map.get()) # TODO unused
logger.info("Processing Allele phenotype associations")
line_counter = 0
geno = Genotype(g)
with open(raw, 'r') as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
if re.match(r'!', ''.join(row)): # header
continue
line_counter += 1
(db, gene_num, gene_symbol, is_not, phenotype_id, ref,
eco_symbol, with_or_from, aspect, gene_name, gene_synonym,
gene_class, taxon, date, assigned_by, blank, blank2) = row
if self.testMode and gene_num not in self.test_ids['gene']:
continue
# TODO add NOT phenotypes
if is_not == 'NOT':
continue
eco_id = None
if eco_symbol == 'IMP':
eco_id = 'ECO:0000015'
elif eco_symbol.strip() != '':
logger.warning(
"Encountered an ECO code we don't have: %s",
eco_symbol)
# according to the GOA spec, persons are not allowed to be
# in the reference column, therefore they the variant and
# persons are swapped between the reference and with column.
# we unswitch them here.
temp_var = temp_ref = None
if re.search(r'WBVar|WBRNAi', ref):
temp_var = ref
# move the paper from the with column into the ref
if re.search(r'WBPerson', with_or_from):
temp_ref = with_or_from
if temp_var is not None or temp_ref is not None:
with_or_from = temp_var
ref = temp_ref
allele_list = re.split(r'\|', with_or_from)
if len(allele_list) == 0:
logger.error(
"Missing alleles from phenotype assoc at line %d",
line_counter)
continue
else:
for a in allele_list:
allele_num = re.sub(r'WB:', '', a.strip())
allele_id = 'WormBase:'+allele_num
gene_id = 'WormBase:'+gene_num
if re.search(r'WBRNAi', allele_id):
# make the reagent-targeted gene,
# & annotate that instead of the RNAi item directly
rnai_num = re.sub(r'WormBase:', '', allele_id)
rnai_id = allele_id
rtg_id = self.make_reagent_targeted_gene_id(
gene_num, rnai_num, self.nobnodes)
geno.addReagentTargetedGene(
rnai_id, 'WormBase:'+gene_num, rtg_id)
geno.addGeneTargetingReagent(
rnai_id, None, geno.genoparts['RNAi_reagent'],
gene_id)
allele_id = rtg_id
elif re.search(r'WBVar', allele_id):
# this may become deprecated by using wormmine
# make the allele to gene relationship
# the WBVars are really sequence alterations
# the public name will come from elsewhere
geno.addSequenceAlteration(allele_id, None)
vl_id = '_'+'-'.join((gene_num, allele_num))
if self.nobnodes:
vl_id = ':'+vl_id
geno.addSequenceAlterationToVariantLocus(
#.........这里部分代码省略.........
示例2: _get_variants
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlterationToVariantLocus [as 别名]
#.........这里部分代码省略.........
if hgvs_c != '-' and hgvs_c.strip() != '':
model.addSynonym(seqalt_id, hgvs_c)
if hgvs_p != '-' and hgvs_p.strip() != '':
model.addSynonym(seqalt_id, hgvs_p)
# add the dbsnp and dbvar ids as equivalent
if dbsnp_num != '-' and int(dbsnp_num) != -1:
dbsnp_id = 'dbSNP:rs'+str(dbsnp_num)
model.addIndividualToGraph(dbsnp_id, None)
model.addSameIndividual(seqalt_id, dbsnp_id)
if dbvar_num != '-':
dbvar_id = 'dbVar:'+dbvar_num
model.addIndividualToGraph(dbvar_id, None)
model.addSameIndividual(seqalt_id, dbvar_id)
# TODO - not sure if this is right... add as xref?
# the rcv is like the combo of the phenotype with the variant
if rcv_nums != '-':
for rcv_num in re.split(r';', rcv_nums):
rcv_id = 'ClinVar:' + rcv_num
model.addIndividualToGraph(rcv_id, None)
model.addXref(seqalt_id, rcv_id)
if gene_id is not None:
# add the gene
model.addClassToGraph(gene_id, gene_symbol)
# make a variant locus
vl_id = '_'+gene_num+'-'+variant_num
if self.nobnodes:
vl_id = ':'+vl_id
vl_label = allele_name
model.addIndividualToGraph(
vl_id, vl_label, geno.genoparts['variant_locus'])
geno.addSequenceAlterationToVariantLocus(seqalt_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
else:
# some basic reporting
gmatch = re.search(r'\(\w+\)', allele_name)
if gmatch is not None and len(gmatch.groups()) > 0:
logger.info(
"Gene found in allele label, but no id provided: %s",
gmatch.group(1))
elif re.match(r'more than 10', gene_symbol):
logger.info(
"More than 10 genes found; "
"need to process XML to fetch (variant=%d)",
int(variant_num))
else:
logger.info(
"No gene listed for variant %d",
int(variant_num))
# parse the list of "phenotypes" which are diseases.
# add them as an association
# ;GeneReviews:NBK1440,MedGen:C0392514,OMIM:235200,SNOMED CT:35400008;MedGen:C3280096,OMIM:614193;MedGen:CN034317,OMIM:612635;MedGen:CN169374
# the list is both semicolon delimited and comma delimited,
# but i don't know why! some are bad, like:
# Orphanet:ORPHA ORPHA319705,SNOMED CT:49049000
if phenotype_ids != '-':
for phenotype in pheno_list:
m = re.match(
r"(Orphanet:ORPHA(?:\s*ORPHA)?)", phenotype)
if m is not None and len(m.groups()) > 0:
phenotype = re.sub(
m.group(1), 'Orphanet:', phenotype.strip())
elif re.match(r'ORPHA:\d+', phenotype):
示例3: _process_data
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlterationToVariantLocus [as 别名]
def _process_data(self, raw, limit=None):
logger.info("Processing Data from %s", raw)
gu = GraphUtils(curie_map.get())
if self.testMode:
g = self.testgraph
else:
g = self.graph
geno = Genotype(g)
line_counter = 0
gu.loadAllProperties(g)
gu.loadObjectProperties(g, geno.object_properties)
# Add the taxon as a class
taxon_id = 'NCBITaxon:10090' # map to Mus musculus
gu.addClassToGraph(g, taxon_id, None)
# with open(raw, 'r', encoding="utf8") as csvfile:
with gzip.open(raw, 'rt') as csvfile:
filereader = csv.reader(csvfile, delimiter=',', quotechar='\"')
next(filereader, None) # skip the header row
for row in filereader:
line_counter += 1
(marker_accession_id, marker_symbol, phenotyping_center,
colony, sex, zygosity, allele_accession_id, allele_symbol,
allele_name, strain_accession_id, strain_name, project_name,
project_fullname, pipeline_name, pipeline_stable_id,
procedure_stable_id, procedure_name, parameter_stable_id,
parameter_name, top_level_mp_term_id, top_level_mp_term_name,
mp_term_id, mp_term_name, p_value, percentage_change,
effect_size, statistical_method, resource_name) = row
if self.testMode and marker_accession_id not in self.test_ids:
continue
# ##### cleanup some of the identifiers ######
zygosity_id = self._map_zygosity(zygosity)
# colony ids sometimes have <> in them, spaces,
# or other non-alphanumerics and break our system;
# replace these with underscores
colony_id = '_'+re.sub(r'\W+', '_', colony)
if self.nobnodes:
colony_id = ':'+colony_id
if not re.match(r'MGI', allele_accession_id):
allele_accession_id = \
'_IMPC-'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
allele_accession_id = ':'+allele_accession_id
if re.search(r'EUROCURATE', strain_accession_id):
# the eurocurate links don't resolve at IMPC
strain_accession_id = '_'+strain_accession_id
if self.nobnodes:
strain_accession_id = ':'+strain_accession_id
elif not re.match(r'MGI', strain_accession_id):
logger.info(
"Found a strange strain accession...%s",
strain_accession_id)
strain_accession_id = 'IMPC:'+strain_accession_id
######################
# first, add the marker and variant to the graph as with MGI,
# the allele is the variant locus. IF the marker is not known,
# we will call it a sequence alteration. otherwise,
# we will create a BNode for the sequence alteration.
sequence_alteration_id = variant_locus_id = None
variant_locus_name = sequence_alteration_name = None
# extract out what's within the <> to get the symbol
if re.match(r'.*<.*>', allele_symbol):
sequence_alteration_name = \
re.match(r'.*<(.*)>', allele_symbol).group(1)
else:
sequence_alteration_name = allele_symbol
if marker_accession_id is not None and \
marker_accession_id == '':
logger.warning(
"Marker unspecified on row %d", line_counter)
marker_accession_id = None
if marker_accession_id is not None:
variant_locus_id = allele_accession_id
variant_locus_name = allele_symbol
variant_locus_type = geno.genoparts['variant_locus']
geno.addGene(marker_accession_id, marker_symbol,
geno.genoparts['gene'])
geno.addAllele(variant_locus_id, variant_locus_name,
variant_locus_type, None)
geno.addAlleleOfGene(variant_locus_id, marker_accession_id)
sequence_alteration_id = \
'_seqalt'+re.sub(r':', '', allele_accession_id)
if self.nobnodes:
sequence_alteration_id = ':'+sequence_alteration_id
geno.addSequenceAlterationToVariantLocus(
sequence_alteration_id, variant_locus_id)
#.........这里部分代码省略.........
示例4: _process_qtls_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlterationToVariantLocus [as 别名]
#.........这里部分代码省略.........
gene_id = gene_id.replace('uncharacterized ', '').strip()
if gene_id is not None and gene_id != '' and gene_id != '.'\
and re.fullmatch(r'[^ ]*', gene_id) is not None:
# we assume if no src is provided and gene_id is an integer,
# then it is an NCBI gene ... (okay, lets crank that back a notch)
if gene_id_src == '' and gene_id.isdigit() and \
gene_id in self.gene_info:
# LOG.info(
# 'Warm & Fuzzy saying %s is a NCBI gene for %s',
# gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '' and gene_id.isdigit():
LOG.warning(
'Cold & Prickely saying %s is a NCBI gene for %s',
gene_id, common_name)
gene_id_src = 'NCBIgene'
elif gene_id_src == '':
LOG.error(
' "%s" is a NOT NCBI gene for %s', gene_id, common_name)
gene_id_src = None
if gene_id_src == 'NCBIgene':
gene_id = 'NCBIGene:' + gene_id
# we will expect that these will get labels elsewhere
geno.addGene(gene_id, None)
# FIXME what is the right relationship here?
geno.addAffectedLocus(qtl_id, gene_id)
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_:' + re.sub(
r':', '', gene_id) + '-' + peak_mark.strip()
geno.addSequenceAlterationToVariantLocus(
dbsnp_id, vl_id)
geno.addAffectedLocus(vl_id, gene_id)
# add the trait
model.addClassToGraph(trait_id, trait_name)
# Add publication
reference = None
if re.match(r'ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
reference = Reference(graph, pub_id)
elif pubmed_id != '':
pub_id = 'PMID:' + pubmed_id.strip()
reference = Reference(
graph, pub_id, self.globaltt['journal article'])
if reference is not None:
reference.addRefToGraph()
# make the association to the QTL
assoc = G2PAssoc(
graph, self.name, qtl_id, trait_id, self.globaltt['is marker for'])
assoc.add_evidence(eco_id)
assoc.add_source(pub_id)
# create a description from the contents of the file
# desc = ''
# assoc.addDescription(g, assoc_id, desc)
# TODO add exp_id as evidence
# if exp_id != '':
示例5: _process_QTLs_genetic_location
# 需要导入模块: from dipper.models.Genotype import Genotype [as 别名]
# 或者: from dipper.models.Genotype.Genotype import addSequenceAlterationToVariantLocus [as 别名]
def _process_QTLs_genetic_location(self, raw, taxon_id, common_name, limit=None):
"""
This function processes
Triples created:
:param limit:
:return:
"""
if self.testMode:
g = self.testgraph
else:
g = self.graph
line_counter = 0
geno = Genotype(g)
gu = GraphUtils(curie_map.get())
eco_id = "ECO:0000061" # Quantitative Trait Analysis Evidence
logger.info("Processing genetic location for %s", taxon_id)
with open(raw, 'r', encoding="iso-8859-1") as csvfile:
filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
for row in filereader:
line_counter += 1
(qtl_id, qtl_symbol, trait_name, assotype, empty, chromosome, position_cm, range_cm,
flankmark_a2, flankmark_a1, peak_mark, flankmark_b1, flankmark_b2, exp_id, model, test_base,
sig_level, lod_score, ls_mean, p_values, f_statistics, variance, bayes_value, likelihood_ratio,
trait_id, dom_effect, add_effect, pubmed_id, gene_id, gene_id_src, gene_id_type, empty2) = row
if self.testMode and int(qtl_id) not in self.test_ids:
continue
qtl_id = 'AQTL:'+qtl_id
trait_id = 'AQTLTrait:'+trait_id
# Add QTL to graph
f = Feature(qtl_id, qtl_symbol, geno.genoparts['QTL'])
f.addTaxonToFeature(g, taxon_id)
# deal with the chromosome
chrom_id = makeChromID(chromosome, taxon_id, 'CHR')
# add a version of the chromosome which is defined as the genetic map
build_id = 'MONARCH:'+common_name.strip()+'-linkage'
build_label = common_name+' genetic map'
geno.addReferenceGenome(build_id, build_label, taxon_id)
chrom_in_build_id = makeChromID(chromosome, build_id, 'MONARCH')
geno.addChromosomeInstance(chromosome, build_id, build_label, chrom_id)
start = stop = None
if re.search('-', range_cm):
range_parts = re.split('-', range_cm)
# check for poorly formed ranges
if len(range_parts) == 2 and range_parts[0] != '' and range_parts[1] != '':
(start, stop) = [int(float(x.strip())) for x in re.split('-', range_cm)]
else:
logger.info("There's a cM range we can't handle for QTL %s: %s", qtl_id, range_cm)
elif position_cm != '':
start = stop = int(float(position_cm))
# FIXME remove converion to int for start/stop when schema can handle floats
# add in the genetic location based on the range
f.addFeatureStartLocation(start, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureEndLocation(stop, chrom_in_build_id, None, [Feature.types['FuzzyPosition']])
f.addFeatureToGraph(g)
# sometimes there's a peak marker, like a rsid. we want to add that as a variant of the gene,
# and xref it to the qtl.
dbsnp_id = None
if peak_mark != '' and peak_mark != '.' and re.match('rs', peak_mark.strip()):
dbsnp_id = 'dbSNP:'+peak_mark.strip()
gu.addIndividualToGraph(g, dbsnp_id, None, geno.genoparts['sequence_alteration'])
gu.addXref(g, qtl_id, dbsnp_id)
if gene_id is not None and gene_id != '' and gene_id != '.':
if gene_id_src == 'NCBIgene' or gene_id_src == '': # we assume if no src is provided, it's NCBI
gene_id = 'NCBIGene:'+gene_id.strip()
geno.addGene(gene_id, None) # we will expect that these labels provided elsewhere
geno.addAlleleOfGene(qtl_id, gene_id, geno.object_properties['feature_to_gene_relation']) # FIXME what is the right relationship here?
if dbsnp_id is not None:
# add the rsid as a seq alt of the gene_id
vl_id = '_' + re.sub(':', '', gene_id) + '-' + peak_mark
if self.nobnodes:
vl_id = ':' + vl_id
geno.addSequenceAlterationToVariantLocus(dbsnp_id, vl_id)
geno.addAlleleOfGene(vl_id, gene_id)
# add the trait
gu.addClassToGraph(g, trait_id, trait_name)
# Add publication
r = None
if re.match('ISU.*', pubmed_id):
pub_id = 'AQTLPub:'+pubmed_id.strip()
r = Reference(pub_id)
elif pubmed_id != '':
pub_id = 'PMID:'+pubmed_id.strip()
r = Reference(pub_id, Reference.ref_types['journal_article'])
if r is not None:
#.........这里部分代码省略.........