本文整理汇总了Python中oncotator.utils.MutUtils.MutUtils类的典型用法代码示例。如果您正苦于以下问题:Python MutUtils类的具体用法?Python MutUtils怎么用?Python MutUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MutUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_build_ensembl_transcript_index
def test_build_ensembl_transcript_index(self):
"""Build the gtf portion of the ensembl transcript db
"""
# cat ~/oncotator_pycharm/oncotator/test/testdata/Saccharomyces_cerevisiae.EF4.71_trim.gtf | cut -f 9 | cut -f 5 --delimiter=" " | sort | uniq | sed -r "s/;//g" | sed -r "s/\"//g"
# snR84, tK(UUU)K, YAL067C, YAL067W-A, YAL068C, YAL068W-A, YAL069W, YBR278W, YBR279W, YBR280C, YBR281C, YDR528W, YDR529C, YKR074W,
#
# grep -Pzo ">(snR84|tK\(UUU\)K|YAL067C|YAL067W-A|YAL068C|YAL068W-A|YAL069W|YBR278W|YBR279W|YBR280C|YBR281C|YDR528W|YDR529C|YKR074W)([A-Za-z_0-9 \:\-\n]+)" Saccharomyces_cerevisiae.EF4.71.cdna.all.fa >Saccharomyces_cerevisiae.EF4.71_trim.cdna.all.fa
#
ensembl_input_gtf = "testdata/Saccharomyces_cerevisiae.EF4.71_trim.gtf"
ensembl_input_fasta = "testdata/Saccharomyces_cerevisiae.EF4.71_trim.cdna.all.fa"
output_filename = "out/test_ensembl_gtf.db"
protocol = "file"
genome_build_factory = GenomeBuildFactory()
genome_build_factory.build_ensembl_transcript_index([ensembl_input_gtf], [ensembl_input_fasta], output_filename, protocol=protocol)
self.assertTrue(os.path.exists(output_filename))
shove = Shove(protocol + "://" + output_filename, "memory://")
self.assertTrue(len(shove.keys()) > 0)
self.assertTrue("YDR529C" in shove.keys())
t = shove["YDR529C"]
self.assertTrue(t.get_seq() is not None)
self.assertTrue(t.get_seq() is not "")
self.assertTrue(len(t.get_cds()) > 0)
self.assertTrue(len(t.get_exons()) > 0)
MutUtils.removeDir(output_filename)
示例2: testChromosomeConversionHG19
def testChromosomeConversionHG19(self):
"""Test that an hg19 build with chrom = 23 or 24 gets converted to X or Y
"""
self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("23", build="hg19"), "X", "chrom of 23 did not produce X: " + MutUtils.convertChromosomeStringToMutationDataFormat("23", build="hg19"))
self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("24", build="hg19"), "Y", "chrom of 24 did not produce Y: " + MutUtils.convertChromosomeStringToMutationDataFormat("24", build="hg19"))
self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("2", build="hg19"), "2", "chrom of 2 yielded different value: " + MutUtils.convertChromosomeStringToMutationDataFormat("2", build="hg19"))
self.assertEqual(MutUtils.convertChromosomeStringToMutationDataFormat("4", build="hg19"), "4", "chrom of 4 yielded different value: " + MutUtils.convertChromosomeStringToMutationDataFormat("4", build="hg19"))
示例3: testSimpleRead
def testSimpleRead(self):
""" Read a good maflite file and make sure that each mutation validates """
tmp = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt", 'configs/maflite_input.config')
muts = tmp.createMutations()
# If no exception is thrown, then this test passes.
for m in muts:
MutUtils.validateMutation(m)
示例4: testExampleVcfDBAnnotationWithSNPExactMatch
def testExampleVcfDBAnnotationWithSNPExactMatch(self):
"""
"""
tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)
chrom = "20"
start = "1110696"
end = "1110696"
ref_allele = "A"
alt_allele = "T"
build = "hg19"
m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)
m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("ESP_AF")
cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float",
description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
number=-1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_AC")
cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer",
description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=None)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_H2")
cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag",
description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=0)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
chrom = "20"
start = "1230237"
end = "1230237"
ref_allele = "T"
alt_allele = "A"
build = "hg19"
m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)
m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)
m1_annotation = m1_annotated.getAnnotation("ESP_NS")
cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer",
description="Number of Samples With Data",
tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
number=1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
m1_annotation = m1_annotated.getAnnotation("ESP_AF")
cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
number=-1)
self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
示例5: testNoUnknownAnnotations
def testNoUnknownAnnotations(self):
""" Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """
inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt')
gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
mutations = inputCreator.createMutations()
for m in mutations:
m = gafDatasource.annotate_mutation(m)
MutUtils.validateMutation(m)
unknownAnnotations = MutUtils.getUnknownAnnotations(m)
self.assertTrue(len(unknownAnnotations) == 0, "Unknown annotations exist in mutation: " + str(unknownAnnotations))
示例6: testCreateGPTsvDatasource
def testCreateGPTsvDatasource(self):
"""
"""
datasourceFilename = "testdata/small_genome_position_tsv_ds/oreganno_trim.hg19.txt"
datasourceType = "gp_tsv"
datasourceName = "ORegAnno"
datasourceFoldername = "ORegAnno"
datasourceVersion = "UCSC Track"
genomeBuild = "hg19"
genomicPositionColumnNames = "hg19.oreganno.chrom,hg19.oreganno.chromStart,hg19.oreganno.chromEnd"
tmpDir = tempfile.mkdtemp()
destDir = os.path.join(*[tmpDir, datasourceFoldername, genomeBuild])
os.makedirs(destDir)
DatasourceInstallUtils.create_datasource(destDir, datasourceFilename, datasourceFoldername, datasourceName,
datasourceType, datasourceVersion, genomicPositionColumnNames)
datasourceFilename = "oreganno_trim.hg19.txt"
configFilename = os.path.join(*[destDir, "ORegAnno.config"])
configParser = ConfigUtils.createConfigParser(configFilename)
self.assertTrue(configParser.has_section("general"), "general section is missing.")
self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
self.assertTrue(configParser.has_option("general", "src_file"),
"src_file option is missing in general section.")
self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
self.assertTrue(configParser.has_option("general", "genomic_position_cols"),
"genomic_position_cols option is missing in general section.")
self.assertEqual(configParser.get("general", "type"), datasourceType,
"Expected data source type is %s but was %s."
% (datasourceType, configParser.get("general", "type")))
self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
"Expected data source src_file is %s but was %s."
% (datasourceFilename, configParser.get("general", "src_file")))
self.assertEqual(configParser.get("general", "title"), datasourceName,
"Expected data source title is %s but was %s."
% (datasourceName, configParser.get("general", "title")))
self.assertEqual(configParser.get("general", "version"), datasourceVersion,
"Expected data source version is %s but was %s."
% (datasourceVersion, configParser.get("general", "version")))
self.assertEqual(configParser.get("general", "genomic_position_cols"), genomicPositionColumnNames,
"Expected data source genomic_position_cols is %s but was %s."
% (genomicPositionColumnNames, configParser.get("general", "genomic_position_cols")))
self.assertTrue(os.path.exists(os.path.join(*[tmpDir, datasourceFoldername, genomeBuild + ".md5"])),
"No md5 file was generated.")
MutUtils.removeDir(tmpDir)
示例7: testChrGLs
def testChrGLs(self):
""" Test that mutations on unaligned transcripts can be annotated properly. I.e. when chromosome = GL....."""
inputCreator = MafliteInputMutationCreator('testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config")
gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
mutations = inputCreator.createMutations()
for m in mutations:
try:
m = gafDatasource.annotate_mutation(m)
MutUtils.validateMutation(m)
except Exception as e:
# Fail this test because an exception was thrown
self.assertTrue(False, "Erroneous exception was thrown: " + str(e) + "\n" + traceback.format_exc())
self.assertTrue(m['gene'] != '')
示例8: _handleMissingAnnotations
def _handleMissingAnnotations(self, m):
missingHeaderAnnotations = MutUtils.retrieveMissingAnnotations(m,
TcgaVcfOutputRenderer.requiredHeaderAnnotations)
missingMutAnnotations = MutUtils.retrieveMissingAnnotations(m, TcgaVcfOutputRenderer.requiredMutAnnotations)
if len(missingHeaderAnnotations) > 0:
sError = "The following annotations are required for rendering a TCGA VCF 1.1, but were not found: " + str(
missingHeaderAnnotations)
self.logger.error(sError)
raise MissingAnnotationException(sError)
if len(missingMutAnnotations) > 0:
sError = "The following annotations important for rendering a TCGA VCF 1.1. Proceeding... : " + str(
missingMutAnnotations)
self.logger.warn(sError)
示例9: _determine_matching_alt_indices
def _determine_matching_alt_indices(self, mut, record, build):
"""
:param mut:
:param record:
:return:
"""
indices = []
if record.is_monomorphic:
chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
startPos = record.POS
endPos = record.POS
ref_allele = record.REF
if self.match_mode == "exact":
if mut.chr == chrom and mut.ref_allele == ref_allele:
indices = [-1]
else:
if mut.chr == chrom and int(mut.start) <= startPos and int(mut.end) >= endPos:
indices = [-1]
else:
# Iterate over all alternates in the record
for index in xrange(0, len(record.ALT)):
chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
startPos = record.POS
endPos = record.POS
ref = str(record.REF)
alt = str(record.ALT[index])
ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)
if self.match_mode == "exact":
if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
and int(mut.end) == int(ds_mut.end):
indices += [index]
else: # cases whether the match mode isn't exact
if mut.chr == ds_mut.chr and int(mut.start) == int(ds_mut.start) and int(mut.end) == int(ds_mut.end):
indices += [index]
elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \
and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end):
indices += [index]
elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) and int(mut.end) >= int(ds_mut.end):
indices += [index]
elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \
and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start):
indices += [index]
# if len(indices) == 0:
# indices = [None]
return indices
示例10: testCreateIndexedVcfDatasource
def testCreateIndexedVcfDatasource(self):
datasourceFilename = "testdata/vcf/example.vcf"
datasourceFoldername = "1000Genomes"
datasourceName = "1000Genomes"
datasourceType = "indexed_vcf"
datasourceVersion = "V4.1"
genomeBuild = "hg19"
tmpDir = tempfile.mkdtemp()
destDir = os.path.join(*[tmpDir, datasourceFoldername, genomeBuild])
os.makedirs(destDir)
DatasourceInstallUtils.create_datasource(destDir, datasourceFilename, datasourceFoldername, datasourceName,
datasourceType, datasourceVersion)
datasourceFilename = "example.tabix_indexed.vcf.gz"
configFilename = os.path.join(*[destDir, "1000Genomes.config"])
configParser = ConfigUtils.createConfigParser(configFilename)
self.assertTrue(configParser.has_section("general"), "general section is missing.")
self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.")
self.assertTrue(configParser.has_option("general", "src_file"),
"src_file option is missing in general section.")
self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.")
self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.")
self.assertEqual(configParser.get("general", "type"), datasourceType,
"Expected data source type is %s but was %s."
% (datasourceType, configParser.get("general", "type")))
self.assertEqual(configParser.get("general", "src_file"), datasourceFilename,
"Expected data source src_file is %s but was %s."
% (datasourceFilename, configParser.get("general", "src_file")))
self.assertEqual(configParser.get("general", "title"), datasourceName,
"Expected data source title is %s but was %s."
% (datasourceName, configParser.get("general", "title")))
self.assertEqual(configParser.get("general", "version"), datasourceVersion,
"Expected data source version is %s but was %s."
% (datasourceVersion, configParser.get("general", "version")))
self.assertTrue(os.path.exists(os.path.join(*[tmpDir, datasourceFoldername, genomeBuild + ".md5"])),
"No md5 file was generated.")
# Data source was created correctly
tabixIndexedFilename = os.path.join(*[destDir, "example.tabix_indexed.vcf.gz"])
self.assertTrue(os.path.exists(tabixIndexedFilename), "No index file was generated.")
vcfReader = vcf.Reader(filename=tabixIndexedFilename, compressed=True, strict_whitespace=True)
vcfRecords = vcfReader.fetch(chrom=20, start=1230237, end=1230237)
for vcfRecord in vcfRecords:
self.assertEqual(vcfRecord.INFO["NS"], 3, "Expected %s but got %s." % (3, vcfRecord.INFO["NS"]))
self.assertEqual(vcfRecord.INFO["DP"], 13, "Expected %s but got %s." % (13, vcfRecord.INFO["DP"]))
MutUtils.removeDir(tmpDir)
示例11: testNoLostMutations
def testNoLostMutations(self):
""" Does a simple gaf datasource annotation run and makes sure that no mutations were lost """
inputFilename = 'testdata/maflite/Patient0.snp.maf.txt'
inputCreator = MafliteInputMutationCreator(inputFilename, "configs/maflite_input.config")
gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config)
numMutsInput = len(file(inputFilename, 'r').readlines()) - 1
mutations = inputCreator.createMutations()
ctr = 0
for m in mutations:
m = gafDatasource.annotate_mutation(m)
MutUtils.validateMutation(m)
ctr += 1
self.assertEqual(ctr, numMutsInput, "Gaf data source altered mutation count.")
示例12: testChrom2HashCodeTable
def testChrom2HashCodeTable(self):
chroms = ["1", "X", "3", "contig1", "Y", "25", "mt"]
h = MutUtils.createChrom2HashCodeTable(chroms)
self.assertTrue(h["1"] == 1, "For chrom 1, hash code should be 1 but it was %s." % h["1"])
self.assertTrue(h["3"] == 3, "For chrom 3, hash code should be 3 but it was %s." % h["3"])
self.assertTrue(h["25"] == 25, "For chrom 25, hash code should be 25 but it was %s." % h["25"])
self.assertTrue(h["X"] == 26, "For chrom X, hash code should be 26 but it was %s." % h["X"])
self.assertTrue(h["Y"] == 27, "For chrom Y, hash code should be 27 but it was %s." % h["Y"])
self.assertTrue(h["mt"] == 28, "For chrom mt, hash code should be 28 but it was %s." % h["mt"])
self.assertTrue(h["contig1"] == 29, "For chrom contig1, hash code should be 29 but it was %s." % h["contig1"])
chroms = ["contig1", "mt"]
h = MutUtils.createChrom2HashCodeTable(chroms)
self.assertTrue(h["mt"] == 3, "For chrom mt, hash code should be 3 but it was %s." % h["mt"])
self.assertTrue(h["contig1"] == 4, "For chrom contig1, hash code should be 4 but it was %s." % h["contig1"])
示例13: _is_matching
def _is_matching(self, mut, tsv_record):
chrom = tsv_record[self.tsv_index["chrom"]]
startPos = tsv_record[self.tsv_index["start"]]
endPos = tsv_record[self.tsv_index["end"]]
build = "hg19"
if self.match_mode == "exact":
if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present
ref = tsv_record[self.tsv_index["ref"]]
alt = tsv_record[self.tsv_index["alt"]]
if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records
# TODO: This looks risky to be calling the MutationData constructor directly
ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
else: # addresses tsv records where the input isn't a Mutation Annotation Format file
ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)
if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
and int(mut.end) == int(ds_mut.end):
return True
else: # do not use ref and alt information
if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
return True
else:
return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos))
return False
示例14: retrieve_cached_annotations
def retrieve_cached_annotations(self, m):
"""
:param m: mutation
:return: list of Annotations, or None, if cache miss.
"""
cache_key = MutUtils.create_variant_key_by_mutation(m, self.get_db_dir_key())
return self.get_cache().retrieve_from_cache(cache_key)
示例15: retrieveExons
def retrieveExons(self, gene, padding=10, isCodingOnly=False):
"""Return a list of (chr, start, end) tuples for each exon"""
result = set()
geneTuple = self.gene_id_idx.get(gene, None)
if geneTuple is None:
return result
ctr = 0
contig = MutUtils.convertChromosomeStringToMutationDataFormat(geneTuple[0])
for b in self.Transcripts.get(contig, []):
for i in self.Transcripts[contig][b]:
if i["gene"] == gene:
if isCodingOnly and gaf_annotation.is_non_coding_transcript(i, self):
ctr += 1
continue
if isCodingOnly:
genomic_coords = self.getCodingTranscriptCoords(i)
else:
genomic_coords = i["genomic_coords"]
for coord in genomic_coords:
start = min(coord[0], coord[1])
end = max(coord[0], coord[1])
result.add((gene, i["chr"], str(start - padding), str(end + padding)))
return result