本文整理汇总了Python中oncotator.TranscriptProviderUtils.TranscriptProviderUtils类的典型用法代码示例。如果您正苦于以下问题:Python TranscriptProviderUtils类的具体用法?Python TranscriptProviderUtils怎么用?Python TranscriptProviderUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TranscriptProviderUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: annotate_mutation
def annotate_mutation(self, mutation):
chr = mutation.chr
start = int(mutation.start)
end = int(mutation.end)
txs = self.get_transcripts_by_pos(chr, start, end)
final_annotation_dict = self._create_blank_set_of_annotations()
final_annotation_dict['variant_type'] = Annotation(value=TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele), datasourceName=self.title)
chosen_tx = None
# We have hit IGR if no transcripts come back. Most annotations can just use the blank set.
if len(txs) == 0:
final_annotation_dict['variant_classification'] = self._create_basic_annotation(VariantClassification.IGR)
nearest_genes = self._get_nearest_genes(chr, int(start), int(end))
final_annotation_dict['other_transcripts'] = self._create_basic_annotation(value='%s (%s upstream) : %s (%s downstream)' % (nearest_genes[0][0], nearest_genes[0][1], nearest_genes[1][0], nearest_genes[1][1]))
final_annotation_dict['gene'] = self._create_basic_annotation('Unknown')
final_annotation_dict['gene_id'] = self._create_basic_annotation('0')
final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
else:
# Choose the best effect transcript
chosen_tx = self._choose_transcript(txs, self.get_tx_mode(), final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, start, end)
vcer = VariantClassifier()
final_annotation_dict['annotation_transcript'] = self._create_basic_annotation(chosen_tx.get_transcript_id())
final_annotation_dict['genome_change'] = self._create_basic_annotation(TranscriptProviderUtils.determine_genome_change(mutation.chr, mutation.start, mutation.end, mutation.ref_allele, mutation.alt_allele, final_annotation_dict['variant_type'].value))
final_annotation_dict['strand'] = self._create_basic_annotation(chosen_tx.get_strand())
final_annotation_dict['transcript_position'] = self._create_basic_annotation(TranscriptProviderUtils.render_transcript_position(int(start), int(end), chosen_tx))
final_annotation_dict['transcript_id'] = self._create_basic_annotation(chosen_tx.get_transcript_id())
variant_classfication = vcer.variant_classify(tx=chosen_tx, variant_type=final_annotation_dict['variant_type'].value,
ref_allele=mutation.ref_allele, alt_allele=mutation.alt_allele, start=mutation.start, end=mutation.end)
final_annotation_dict['transcript_exon'] = self._create_basic_annotation(str(variant_classfication.get_exon_i()+1))
final_annotation_dict['variant_classification'] = self._create_basic_annotation(variant_classfication.get_vc())
final_annotation_dict['secondary_variant_classification'] = self._create_basic_annotation(variant_classfication.get_secondary_vc())
final_annotation_dict['protein_change'] = self._create_basic_annotation(vcer.generate_protein_change_from_vc(variant_classfication))
final_annotation_dict['codon_change'] = self._create_basic_annotation(vcer.generate_codon_change_from_vc(chosen_tx, start, end, variant_classfication))
final_annotation_dict['transcript_change'] = self._create_basic_annotation(vcer.generate_transcript_change_from_tx(chosen_tx, final_annotation_dict['variant_type'].value, variant_classfication, start, end, mutation.ref_allele, mutation.alt_allele))
final_annotation_dict['transcript_strand'] = self._create_basic_annotation(chosen_tx.get_strand())
final_annotation_dict['gene'] = self._create_basic_annotation(chosen_tx.get_gene())
final_annotation_dict['gene_type'] = self._create_basic_annotation(chosen_tx.get_gene_type())
final_annotation_dict['gencode_transcript_tags'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'tag'))
final_annotation_dict['gencode_transcript_status'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_status'))
final_annotation_dict['havana_transcript'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'havana_transcript'))
final_annotation_dict['ccds_id'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'ccdsid'))
final_annotation_dict['gencode_transcript_type'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_type'))
final_annotation_dict['gencode_transcript_name'] = self._create_basic_annotation(self._retrieve_gencode_tag_value(chosen_tx, 'transcript_name'))
other_transcript_value = self._render_other_transcripts(txs, [txs.index(chosen_tx)], final_annotation_dict['variant_type'].value, mutation.ref_allele, mutation.alt_allele, mutation.start, mutation.end)
final_annotation_dict['other_transcripts'] = self._create_basic_annotation(other_transcript_value)
# final_annotation_dict['gene_id'].value
mutation.addAnnotations(final_annotation_dict)
# Add the HGVS annotations ... setting to "" if not available.
hgvs_dict_annotations = self._create_hgvs_annotation_dict(mutation, chosen_tx)
mutation.addAnnotations(hgvs_dict_annotations)
return mutation
示例2: generate_transcript_change_from_tx
def generate_transcript_change_from_tx(self, tx, variant_type, vc, start_genomic_space, end_genomic_space, ref_allele, alt_allele):
"""
:param vc:
:return:
"""
if vc.get_vc() == VariantClassification.SPLICE_SITE and vc.get_secondary_vc() == VariantClassification.INTRON:
return ""
# dist_from_exon = self._get_splice_site_coordinates(tx, start_genomic_space, end_genomic_space, vc.get_exon_i())
# exon_i = vc.get_exon_i()
# return TranscriptProviderUtils.render_splice_site_transcript_change(tx, dist_from_exon, exon_i, vc.get_secondary_vc() == VariantClassification.INTRON)
if vc.get_cds_start_in_exon_space() == "" or vc.get_cds_start_in_exon_space() < 0:
return ""
exon_position_start,exon_position_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(int(start_genomic_space), int(end_genomic_space), tx)
if tx.get_strand() == "-":
cds_position_start_cds_space = exon_position_start - int(vc.get_cds_start_in_exon_space())+1
cds_position_end_cds_space = exon_position_end - int(vc.get_cds_start_in_exon_space())+1
else:
cds_position_start_cds_space = exon_position_start - int(vc.get_cds_start_in_exon_space())
cds_position_end_cds_space = exon_position_end - int(vc.get_cds_start_in_exon_space())
observed_allele_stranded, reference_allele_stranded = self._get_stranded_alleles(ref_allele, alt_allele, tx)
result = TranscriptProviderUtils.render_transcript_change(variant_type, vc.get_vc(), cds_position_start_cds_space, cds_position_end_cds_space, reference_allele_stranded, observed_allele_stranded, vc.get_secondary_vc())
return result
示例3: _add
def _add(self, mutation):
variant_type = TranscriptProviderUtils.infer_variant_type(mutation.ref_allele, mutation.alt_allele)
# only combine ONPs, not indels
if not TranscriptProviderUtils.is_xnp(variant_type):
self.indel_queue.append(mutation)
else:
self.queue[self.sns.getSampleName(mutation)].append(mutation)
示例4: generate_codon_change_from_vc
def generate_codon_change_from_vc(self, t, start, end, vc):
"""
:param t: (Transcript)
:param start: (int)
:param end: (int)
:param vc: (VariantClassification)
:return:
"""
dist_from_exon = self._get_splice_site_coordinates(t, start, end, vc.get_exon_i())
exon_i = vc.get_exon_i()
if vc.get_vc() == VariantClassification.SPLICE_SITE and vc.get_secondary_vc() == VariantClassification.INTRON:
return TranscriptProviderUtils.render_intronic_splice_site_codon_change(dist_from_exon, exon_i)
if vc.get_ref_codon_start_in_exon() == "" or vc.get_ref_codon_end_in_exon() == "":
return ""
codon_position_start_cds_space = int(vc.get_ref_codon_start_in_exon()) - int(vc.get_cds_start_in_exon_space())+1
codon_position_end_cds_space = int(vc.get_ref_codon_end_in_exon()) - int(vc.get_cds_start_in_exon_space())+1
ref_codon_seq = vc.get_ref_codon()
alt_codon_seq = vc.get_alt_codon()
result = TranscriptProviderUtils.render_codon_change(vc.get_vt(), vc.get_vc(), int(codon_position_start_cds_space), int(codon_position_end_cds_space), ref_codon_seq, alt_codon_seq, dist_from_exon, exon_i, vc.get_secondary_vc())
return result
示例5: initializeMutFromAttributes
def initializeMutFromAttributes(chr, start, end, ref_allele, alt_allele, build, mutation_data_factory=None):
mutation_data_factory = MutationDataFactory() if mutation_data_factory is None else mutation_data_factory
mut = mutation_data_factory.create(str(chr), str(start), str(end), ref_allele, alt_allele, str(build))
varType = TranscriptProviderUtils.infer_variant_type(mut.ref_allele, mut.alt_allele)
if TranscriptProviderUtils.is_xnp(varType): # Snps and other xNPs
mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME, annotationValue="")
if varType == VariantClassification.VT_DEL: # deletion
preceding_bases, updated_ref_allele, updated_start, updated_end =\
MutUtils.retrievePrecedingBasesForDeletions(mut)
mut.ref_allele = updated_ref_allele
mut["ref_allele"] = updated_ref_allele
mut.alt_allele = "-"
mut["alt_allele"] = "-"
mut.start = updated_start
mut["start"] = updated_start
mut.end = updated_end
mut["end"] = updated_end
mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
annotationValue=preceding_bases)
elif varType == VariantClassification.VT_INS: # insertion
preceding_bases, updated_alt_allele, updated_start, updated_end = \
MutUtils.retrievePrecedingBasesForInsertions(mut)
mut.ref_allele = "-"
mut["ref_allele"] = "-"
mut.alt_allele = updated_alt_allele
mut["alt_allele"] = updated_alt_allele
mut.start = updated_start
mut["start"] = updated_start
mut.end = updated_end
mut["end"] = updated_end
mut.createAnnotation(annotationName=MutUtils.PRECEDING_BASES_ANNOTATION_NAME,
annotationValue=preceding_bases)
return mut
示例6: _determine_codon_overlap
def _determine_codon_overlap(self, s, e, codon_tuple, variant_type):
if codon_tuple is None:
return False
if variant_type == VariantClassification.VT_INS:
is_codon_overlap = TranscriptProviderUtils.test_overlap(s, s, codon_tuple[0]+1, codon_tuple[1])
else:
is_codon_overlap = TranscriptProviderUtils.test_overlap(s, e, codon_tuple[0]+1, codon_tuple[1])
return is_codon_overlap
示例7: test_codon_single_base
def test_codon_single_base(self, start, end, ref_base_stranded, gt_codon):
"""Test that we can grab the proper three bases of a codon for an arbitrary single base """
tx = self.retrieve_test_transcript_MAPK1()
transcript_position_start, transcript_position_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(start, end, tx)
cds_start, cds_stop = TranscriptProviderUtils.determine_cds_in_exon_space(tx)
protein_position_start, protein_position_end = TranscriptProviderUtils.get_protein_positions(transcript_position_start, transcript_position_end, cds_start)
cds_codon_start, cds_codon_end = TranscriptProviderUtils.get_cds_codon_positions(protein_position_start, protein_position_end, cds_start)
codon_seq = tx.get_seq()[cds_codon_start:cds_codon_end+1]
self.assertTrue(codon_seq == gt_codon, "Did not get correct codon (%s): %s loc: %s-%s" %(gt_codon, codon_seq, start, end))
示例8: _determine_de_novo
def _determine_de_novo(self, vc_str, exon_start, ref, alt, tx, variant_type, buffer=2 ):
"""Returns input vc if not de Novo. Otherwise, returns updated variant classification.
:param exon_start:
:param buffer:
:param vc_str: Current variant classification. Note that if this is not 5'UTR, this method will just return this input.
:param ref: (str) Does not take into account strandedness (e.g. m.ref_allele)
:param alt: (str) Does not take into account strandedness (e.g. m.alt_allele)
:param tx: transcript
:param variant_type:
Will always return original vc if the vc is not None."""
result = vc_str
if vc_str == VariantClassification.FIVE_PRIME_UTR and ref != alt:
mutated_utr_region = self._mutate_exon(tx, ref, alt, variant_type, exon_start, buffer)
atg_position = mutated_utr_region.find('ATG')
if atg_position > -1:
atg_exon_position = exon_start + atg_position - buffer
cds_start_in_exon_space, cds_end_in_exon_space = TranscriptProviderUtils.determine_cds_in_exon_space(tx)
if (cds_start_in_exon_space - atg_exon_position) % 3 == 0:
frameness = 'InFrame'
else:
frameness = 'OutOfFrame'
result = 'De_novo_Start_' + frameness
return result
示例9: _choose_best_effect_transcript
def _choose_best_effect_transcript(self, txs, variant_type, ref_allele, alt_allele, start, end):
"""Choose the transcript with the most detrimental effect.
The rankings are in TranscriptProviderUtils.
Ties are broken by which transcript has the longer coding length.
:param list txs: list of Transcript
:param str variant_type:
:param str ref_allele:
:param str alt_allele:
:param str start:
:param str end:
:return Transcript:
"""
vcer = VariantClassifier()
effect_dict = TranscriptProviderUtils.retrieve_effect_dict()
best_effect_score = 100000000 # lower score is more likely to get picked
best_effect_tx = None
for tx in txs:
if (ref_allele == "" or ref_allele == "-") and (alt_allele == "" or alt_allele == "-"):
vc = VariantClassification.SILENT
else:
vc = vcer.variant_classify(tx, ref_allele, alt_allele, start, end, variant_type).get_vc()
effect_score = effect_dict.get(vc, 25)
if effect_score < best_effect_score:
best_effect_score = effect_score
best_effect_tx = tx
elif (effect_score == best_effect_score) and (len(best_effect_tx.get_seq()) < len(tx.get_seq())):
best_effect_score = effect_score
best_effect_tx = tx
return best_effect_tx
示例10: _is_matching
def _is_matching(self, mut, tsv_record):
chrom = tsv_record[self.tsv_index["chrom"]]
startPos = tsv_record[self.tsv_index["start"]]
endPos = tsv_record[self.tsv_index["end"]]
build = "hg19"
if self.match_mode == "exact":
if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present
ref = tsv_record[self.tsv_index["ref"]]
alt = tsv_record[self.tsv_index["alt"]]
if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records
# TODO: This looks risky to be calling the MutationData constructor directly
ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
else: # addresses tsv records where the input isn't a Mutation Annotation Format file
ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)
if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
and int(mut.end) == int(ds_mut.end):
return True
else: # do not use ref and alt information
if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
return True
else:
return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos))
return False
示例11: test_seq
def test_seq(self, start, end, gt):
"""Test that we can successfully determine the codon at an arbitrary location on test transcript"""
tx = self.retrieve_test_transcript_MAPK1()
transcript_position_start, transcript_position_end = TranscriptProviderUtils.convert_genomic_space_to_exon_space(start, end, tx)
transcript_seq = tx.get_seq()
seq = transcript_seq[transcript_position_start:transcript_position_end+1]
self.assertTrue(seq == gt, "Incorrect seq found guess,gt (%s, %s)" %(seq, gt))
示例12: _calculate_protein_sequence
def _calculate_protein_sequence(self, exons, seq, cds_start_genomic_space, cds_stop_genomic_space, strand):
cds_start_exon_space, cds_stop_exon_space = TranscriptProviderUtils._convert_genomic_space_to_feature_space(int(cds_start_genomic_space), int(cds_stop_genomic_space), exons, strand)
prot_seq = MutUtils.translate_sequence(seq[int(cds_start_exon_space):int(cds_stop_exon_space)])
if len(prot_seq) > 0 and prot_seq[-1] == '*':
prot_seq = prot_seq[:-1]
return prot_seq
示例13: test_mutate_reference_seqeunce
def test_mutate_reference_seqeunce(self, vt, start, end, ref, alt, start_exon_space, end_exon_space, mutated_seq_gt):
""" Test that we can render a mutated sequence with SNP, INS, and DEL
"""
# mutated_seq_gt is stranded and this is a "-" transcript
tx = self.retrieve_test_transcript_MAPK1()
observed_allele = Bio.Seq.reverse_complement(alt)
mutated_allele = TranscriptProviderUtils.mutate_reference_sequence(tx.get_seq()[start_exon_space : end_exon_space+1], start_exon_space, start_exon_space, end_exon_space, observed_allele, vt)
self.assertTrue(mutated_seq_gt == mutated_allele, "No match (gt/guess) %s/%s for %s." % (mutated_seq_gt, mutated_allele, str([vt, start, end, ref, alt, start_exon_space, end_exon_space, mutated_seq_gt])))
示例14: test_convert_genomic_space_to_transcript_space
def test_convert_genomic_space_to_transcript_space(self):
base_config_location = "testdata/ensembl/saccer/"
ensembl_ds = DatasourceFactory.createDatasource(base_config_location + "ensembl.config", base_config_location)
tx = ensembl_ds.get_overlapping_transcripts("I", "350", "350") # transcript starts at 335.
start, end = TranscriptProviderUtils.convert_genomic_space_to_transcript_space("350", "350", tx[0])
self.assertTrue(start == end)
self.assertTrue(start == 16)
tx = ensembl_ds.get_overlapping_transcripts("II", "764690", "764690") # transcript starts at 764697 (strand is '-').
start, end = TranscriptProviderUtils.convert_genomic_space_to_transcript_space("764690", "764690", tx[0])
self.assertTrue(start == end)
self.assertTrue(start == 7)
start, end = TranscriptProviderUtils.convert_genomic_space_to_transcript_space("764680", "764690", tx[0])
self.assertTrue(start == (end - 10))
self.assertTrue(start == 7)
示例15: _calculate_effect_score
def _calculate_effect_score(tx, start, end, alt_allele, ref_allele, variant_type):
"""Compute the effect score"""
effect_dict = TranscriptProviderUtils.retrieve_effect_dict()
vcer = VariantClassifier()
if (ref_allele == "" or ref_allele == "-") and (alt_allele == "" or alt_allele == "-"):
vc = VariantClassification.SILENT
else:
vc = vcer.variant_classify(tx, ref_allele, alt_allele, start, end, variant_type).get_vc()
effect_score = effect_dict.get(vc, 25)
return effect_score