本文整理汇总了Python中oncotator.Annotator.Annotator.annotate方法的典型用法代码示例。如果您正苦于以下问题:Python Annotator.annotate方法的具体用法?Python Annotator.annotate怎么用?Python Annotator.annotate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类oncotator.Annotator.Annotator
的用法示例。
在下文中一共展示了Annotator.annotate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_full_seg_file_annotations
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def test_full_seg_file_annotations(self):
"""Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_full_seg_file_annotations.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "SIMPLE_TSV", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
headers = output_reader.getFieldNames()
for rcol in required_cols:
self.assertTrue(rcol in headers)
for line_dict in output_reader:
self.assertTrue(line_dict['start'] is not None)
self.assertTrue(line_dict['start'].strip() != "")
self.assertTrue(line_dict['end'] is not None)
self.assertTrue(line_dict['end'].strip() != "")
self.assertTrue("genes" in line_dict.keys())
self.assertTrue(len(line_dict["genes"].split(",")) > 0)
示例2: test_basic_rendering
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def test_basic_rendering(self):
"""Test that we can render a basic seg file as a gene list"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_basic_rendering.gene_list.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
headers = output_reader.getFieldNames()
for line_dict in output_reader:
self.assertTrue(line_dict['segment_start'] is not None)
self.assertTrue(line_dict['segment_start'].strip() != "")
self.assertTrue(line_dict['segment_end'] is not None)
self.assertTrue(line_dict['segment_end'].strip() != "")
self.assertTrue("gene" in line_dict.keys())
self.assertTrue(len(line_dict["gene"]) > 0)
self.assertTrue(float(line_dict["segment_num_probes"]))
self.assertTrue(line_dict['sample'] == "Patient0")
示例3: testFullIndelVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testFullIndelVcf(self):
""" Perform test of a Indel maflite all the way through TCGA VCF creation
"""
outputFilename = "out/TCGAVCFTest.indel.vcf"
callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
vcfOR = TcgaVcfOutputRenderer(outputFilename)
datasources = self._createDatasourcesForTesting()
annotator = Annotator()
annotator.setInputCreator(callStatsIn)
annotator.setOutputRenderer(vcfOR)
annotator.setManualAnnotations(self._createManualAnnotations())
for ds in datasources:
annotator.addDatasource(ds)
annotator.annotate()
self.assertTrue(os.path.exists(outputFilename))
# Check that the deletions have position decremented by one from what is present in the maflite
# Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are
# the same.
maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt")
muts = maflite_ic.createMutations()
vcf_reader = vcf.Reader(open(outputFilename, 'r'))
vcf_pos = [int(rec.POS) for rec in vcf_reader]
for m in muts:
# If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same.
is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".")
if is_variant_deletion:
self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start)
else:
self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
示例4: test_overwriting_muts
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def test_overwriting_muts(self):
"""Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
# We will have an input with a "Who" annotation that this datasource will try to write.
gene_ds = DatasourceFactory.createDatasource(
"testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
)
input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
output_filename = "out/who_alt1_vs_alt2.maf.annotated"
input_format = "MAFLITE"
output_format = "TCGAMAF"
other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: True}
run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
input_format,
output_format,
input_filename,
output_filename,
datasource_list=[gene_ds],
other_opts=other_opts,
)
annotator = Annotator()
annotator.initialize(run_spec)
annotator.annotate()
tsv_reader = GenericTsvReader(output_filename)
for i, line_dict in enumerate(tsv_reader):
self.assertTrue(line_dict.get("TJ_Data_Who", "") != "Tromokratis")
示例5: testDuplicateAnnotation
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testDuplicateAnnotation(self):
"""
Tests that the duplicate annotations are parsed correctly.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
fieldnames = tsvReader.getFieldNames()
self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")
row = tsvReader.next()
self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")
self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")
示例6: testTCGAMAFAsInputAndQuickAnnotate
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例7: testSNPsAndIndelStartAndEndPos
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testSNPsAndIndelStartAndEndPos(self):
"""
Tests that the start and end positions of SNPs and Indels are parsed as defined by the NCI's MAF specification
(https://wiki.nci.nih.gov/display/TCGA/Mutation+Annotation+Format+(MAF)+Specification).
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.snps.indels.vcf"])
outputFilename = os.path.join("out", "example.snps.indels.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
for row in tsvReader:
if row['start'] == "16890445":
self.assertEqual(row["end"], "16890445", "The value should be %s but it was %s." % ("16890445",
row["end"]))
elif row["start"] == "154524458":
self.assertEqual(row["end"], "154524459", "The value should be %s but it was %s." % ("154524459",
row["end"]))
elif row["start"] == "114189432":
self.assertEqual(row["end"], "114189433", "The value should be %s but it was %s." % ("114189433",
row["end"]))
示例8: test_rendering_combined_to_tsv
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def test_rendering_combined_to_tsv(self):
"""Test that we produce a merged ONP simple tsv file without crashing """
input_filename = os.path.join(*["testdata", "maflite", "onp_combination.maf.txt"])
output_filename = os.path.join("out", "onp_combination.tsv")
spec = RunSpecificationFactory.create_run_spec("MAFLITE","SIMPLE_TSV",input_filename, output_filename,
other_opts={OptionConstants.INFER_ONPS: True})
annotator = Annotator()
annotator.initialize(spec)
annotator.annotate()
示例9: testNumberGRenderingOfRandomVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testNumberGRenderingOfRandomVcf(self):
inputFilename = os.path.join(*["testdata", "vcf", "number_g.random.vcf"])
outputFilename = os.path.join("out", "number_g.random.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
示例10: test_single_sample_onp_combiner
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def test_single_sample_onp_combiner(self):
"""test that we can create an onp combined TCGA maf without crashing"""
input_filename = 'testdata/maflite/onp.singlesample.maf.txt'
output_filename = 'out/testSingleSampleOnpCombiner.maf'
config = TestUtils.createUnitTestConfig()
defaultdb = config.get('DEFAULT',"dbDir")
spec = RunSpecificationFactory.create_run_spec("MAFLITE","TCGAMAF", input_filename, output_filename,
datasource_dir=defaultdb,
other_opts={OptionConstants.INFER_ONPS: True})
annotator = Annotator()
annotator.initialize(spec)
annotator.annotate()
示例11: testAnnotationWithNoSampleNameExampleVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testAnnotationWithNoSampleNameExampleVcf(self):
"""
Tests whether parsed annotations match the actual annotations when the input is a VCF file that has no samples.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.sampleName.removed.vcf"])
outputFilename = os.path.join("out", "example.sampleName.removed.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
示例12: testAnnotationWithMafliteWithTrailingSpaces
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testAnnotationWithMafliteWithTrailingSpaces(self):
"""
Tests the ability to annotate a maflite file that contains trailing spaces in ref and alt alleles.
"""
db_dir = self.config.get('DEFAULT',"dbDir")
inputFilename = os.path.join(*["testdata", "maflite", "example.trailing_whitespace_in_alleles.maflite"])
outputFilename = os.path.join("out", "example.trailing_whitespace_in_alleles.maf.txt")
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("MAFLITE", "TCGAMAF", inputFilename, outputFilename,
datasource_dir=db_dir, annotating_type=RunSpecification.ANNOTATE_MUTATIONS)
annotator.initialize(run_spec)
annotator.annotate()
示例13: _annotate_m2_vcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def _annotate_m2_vcf(self, input_vcf_file, output_tcgamaf_file):
# For this conversion, you must specify the barcodes manually
override_annotations = dict()
override_annotations.update({'tumor_barcode': 'Patient0-Tumor', 'normal_barcode': 'Patient0-Normal'})
other_opts = {OptionConstants.COLLAPSE_FILTER_COLS: True, OptionConstants.NO_PREPEND: True,
OptionConstants.SPLIT_ALLELIC_DEPTH: False, OptionConstants.INFER_ONPS: True}
# Use an empty datasource dir in order to speed this up.
annotator = Annotator()
runSpec = RunSpecificationFactory.create_run_spec("VCF", "TCGAMAF", input_vcf_file, output_tcgamaf_file,
datasource_dir=".", global_annotations=override_annotations,
is_skip_no_alts=True, other_opts=other_opts)
annotator.initialize(runSpec)
annotator.annotate()
示例14: testAnnotationWithDuplicateValuesInVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testAnnotationWithDuplicateValuesInVcf(self):
"""
Tests the ability to parse a VCF that contains an INFO, FILTER, and INFO field with the same name.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_fields.vcf"])
outputFilename = os.path.join("out", "example.duplicate_fields2.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename, [])
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
示例15: testSimpleAnnotationWithAComplexVcf
# 需要导入模块: from oncotator.Annotator import Annotator [as 别名]
# 或者: from oncotator.Annotator.Annotator import annotate [as 别名]
def testSimpleAnnotationWithAComplexVcf(self):
"""
Tests the ability to parse a rather complex VCF file without any errors.
"""
inputFilename = os.path.join(*["testdata", "vcf", "random.vcf"])
outputFilename = os.path.join("out", "random.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename, [])
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()