本文整理匯總了Python中oncotator.utils.GenericTsvReader.GenericTsvReader.getFieldNames方法的典型用法代碼示例。如果您正苦於以下問題:Python GenericTsvReader.getFieldNames方法的具體用法?Python GenericTsvReader.getFieldNames怎麽用?Python GenericTsvReader.getFieldNames使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類oncotator.utils.GenericTsvReader.GenericTsvReader
的用法示例。
在下文中一共展示了GenericTsvReader.getFieldNames方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: sortFile
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def sortFile(self, filename, func, length=50000):
"""
This method sorts the input file and writes out the sorted file to filename.
:param filename: sorted filename
:param func: function that converts each row of the input file to an unique, sortable key
:param length: maximum number of lines in a partition
"""
reader = GenericTsvReader(filename=self.readfilename, commentPrepend=self.commentPrepend,
delimiter=self.delimiter)
comments = reader.getComments()
fieldnames = reader.getFieldNames()
if fieldnames is None:
fieldnames = []
fieldnameIndexes = collections.OrderedDict()
if fieldnames is not None:
fieldnameIndexes = collections.OrderedDict([(x, i) for (i, x) in enumerate(fieldnames)])
iterable = iter(reader.getInputContentFP())
partitions = self._yieldPartitions(iterable, func, fieldnameIndexes, length)
with open(name=filename, mode='wb', buffering=64 * 1024) as writer:
writer.write(comments)
writer.write(string.join(fieldnames, self.delimiter) + "\n")
writer.writelines(self._merge(partitions)) # generators are allowed as inputs to writelines function
示例2: testDuplicateAnnotation
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testDuplicateAnnotation(self):
"""
Tests that the duplicate annotations are parsed correctly.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
fieldnames = tsvReader.getFieldNames()
self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")
row = tsvReader.next()
self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")
self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")
示例3: _create_test_ds
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def _create_test_ds(self, input_tsv, dir_name, index_cols):
base_name = "test_snp_leveldb"
full_name = dir_name + "/" + base_name
if os.path.exists(full_name):
shutil.rmtree(full_name)
os.makedirs(full_name)
tsv_reader = GenericTsvReader(input_tsv, commentPrepend="%")
annotation_cols = copy.copy(tsv_reader.getFieldNames())
for icol in index_cols:
if icol in annotation_cols:
annotation_cols.remove(icol)
ds_creator = SnpOnlyLevelDbDatasourceCreator()
ds_creator.createDatasource(full_name, input_tsv, ",".join(index_cols), full_name + "/" + base_name + ".config", "snp_leveldb", base_name, "TEST",
"exact", annotation_cols, [])
config_filename = "out/test_simple_annotate_snp_only_leveldb/test_snp_leveldb/test_snp_leveldb.config"
ds = DatasourceFactory.createDatasource(os.path.abspath(config_filename), os.path.dirname(config_filename))
return ds
示例4: test_full_seg_file_annotations
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def test_full_seg_file_annotations(self):
"""Test that we can read in a seg file, do a proper full annotation, and output as SIMPLE_TSV"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_full_seg_file_annotations.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "SIMPLE_TSV", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
headers = output_reader.getFieldNames()
for rcol in required_cols:
self.assertTrue(rcol in headers)
for line_dict in output_reader:
self.assertTrue(line_dict['start'] is not None)
self.assertTrue(line_dict['start'].strip() != "")
self.assertTrue(line_dict['end'] is not None)
self.assertTrue(line_dict['end'].strip() != "")
self.assertTrue("genes" in line_dict.keys())
self.assertTrue(len(line_dict["genes"].split(",")) > 0)
示例5: test_basic_rendering
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def test_basic_rendering(self):
"""Test that we can render a basic seg file as a gene list"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_basic_rendering.gene_list.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
headers = output_reader.getFieldNames()
for line_dict in output_reader:
self.assertTrue(line_dict['segment_start'] is not None)
self.assertTrue(line_dict['segment_start'].strip() != "")
self.assertTrue(line_dict['segment_end'] is not None)
self.assertTrue(line_dict['segment_end'].strip() != "")
self.assertTrue("gene" in line_dict.keys())
self.assertTrue(len(line_dict["gene"]) > 0)
self.assertTrue(float(line_dict["segment_num_probes"]))
self.assertTrue(line_dict['sample'] == "Patient0")
示例6: test_simple_seg_file_input
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def test_simple_seg_file_input(self):
"""Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV"""
inputFilename = "testdata/seg/Patient0.seg.txt"
output_filename = "out/test_simple_seg_file_input.tsv"
if os.path.exists(output_filename):
os.remove(output_filename)
ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
segs = ic.createMutations()
i = 1
for i,seg in enumerate(segs):
pass
self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1))
ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
segs = ic.createMutations()
outputRenderer = SimpleOutputRenderer(output_filename, '')
outputRenderer.renderMutations(segs)
# Now check the output
output_reader = GenericTsvReader(output_filename)
required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
headers = output_reader.getFieldNames()
for rcol in required_cols:
self.assertTrue(rcol in headers)
for line_dict in output_reader:
self.assertTrue(line_dict['start'] is not None)
self.assertTrue(line_dict['start'].strip() != "")
self.assertTrue(line_dict['end'] is not None)
self.assertTrue(line_dict['end'].strip() != "")
示例7: testTCGAMAFAsInputAndQuickAnnotate
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例8: createDatasource
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def createDatasource(self, destDir, ds_file, index_column_names, configFilename, ds_type, ds_name, ds_version,
ds_match_mode, annotation_column_names, indexCols):
"""
:param destDir:
:param ds_file:
:param index_column_names:
:param configFilename:
:param ds_type:
:param ds_name:
:param ds_version:
:param ds_match_mode:
:param annotation_column_names: If blank, assume all in the tsv (minus the index columns)
:param indexCols: list of the index columns. Assumed to be five corresponding to chrom, start, end, ref, and alt.
"""
index_column_names = index_column_names.split(",")
output_filename = destDir + "/" + ds_name + ".leveldb"
src_file = os.path.basename(output_filename)
db = leveldb.LevelDB(output_filename, create_if_missing=True)
comment_prepend = "#"
if any([True for icol in index_column_names if icol.startswith("#")]):
comment_prepend = "%"
tsv_file = ds_file
tsv_reader = GenericTsvReader(tsv_file, commentPrepend=comment_prepend)
if annotation_column_names is None:
annotation_column_names = copy.copy(tsv_reader.getFieldNames())
for icol in index_column_names:
if icol in annotation_column_names:
annotation_column_names.remove(icol)
logging.getLogger(__name__).info("Creating SNP LevelDB for the following index headers: " + str(index_column_names))
logging.getLogger(__name__).info("Creating SNP LevelDB for the following data headers: " + str(annotation_column_names))
# Create the config file
self._createConfigFile(configFilename, src_file, ds_name, ds_version, index_column_names, annotation_columns=annotation_column_names)
batch = leveldb.WriteBatch()
for i,line_dict in enumerate(tsv_reader):
chrom = line_dict[index_column_names[0]]
start = line_dict[index_column_names[1]]
end = line_dict[index_column_names[2]]
ref = line_dict[index_column_names[3]]
alt = line_dict[index_column_names[4]]
h = SnpOnlyLevelDbDatasource.generate_hash(chrom, start, end, ref, alt)
if i % 5000 == 0:
logging.getLogger(__name__).info("Rendering %d entries" % (i))
line_list = [line_dict.get(k, "") for k in annotation_column_names]
db.Put(h, ",".join(line_list))
db.Write(batch, sync = True)
示例9: testExposedColumns
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testExposedColumns(self):
"""Test that columns listed in the config file as exposed do not get the i_ prepend"""
testOutputFilename = self._annotateTest('testdata/maflite/tiny_maflite.maf.txt', "out/testExposedCols.maf.tsv", self._determine_db_dir())
# Sanity checks to make sure that the generated maf file is not junk.
self._validateTcgaMafContents(testOutputFilename)
# Check the columns, since the input has a couple of exposed columns.
tsvReader = GenericTsvReader(testOutputFilename)
headers = tsvReader.getFieldNames()
headersToCheck = ['t_alt_count', 't_ref_count']
for h in headersToCheck:
self.assertFalse(("i_" + h) in headers, "i_ was prepended to " + h)
self.assertTrue(h in headers, h + " not found.")
示例10: testBasicAnnotation
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testBasicAnnotation(self):
""" Test annotation from a generic TSV based on a transcript annotation. Only confirms the proper headers of the output. """
# We need a gaf data source to annotate gene
gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
transcriptDS = DatasourceFactory.createDatasource(
"testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config", "testdata/small_transcript_tsv_ds/"
)
outputFilename = "out/genericTranscriptTest.out.tsv"
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gafDatasource)
annotator.addDatasource(transcriptDS)
outputFilename = annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
headers = tsvReader.getFieldNames()
self.assertTrue("refseq_test_mRNA_Id" in headers, "refseq_test_mRNA_Id not found in headers: " + str(headers))
self.assertTrue("refseq_test_prot_Id" in headers, "refseq_test_prot_Id not found in headers: " + str(headers))
示例11: testBasicAnnotation
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testBasicAnnotation(self):
''' Annotate from a basic tsv gene file. Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
Using trimmed CancerGeneCensus as basis for this test.
'''
# cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
genesAvailable = ['ABL1','ABL2','ACSL3','AF15Q14','AF1Q','AF3p21','AF5q31','AKAP9','AKT1','AKT2','ALDH2','ALK','ALO17','APC','ARHGEF12','ARHH','ARID1A','ARID2','ARNT','ASPSCR1','ASXL1','ATF1','ATIC','ATM','ATRX','BAP1','BCL10','BCL11A','BCL11B']
# We need a gaf data source to annotate gene
gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
geneDS = DatasourceFactory.createDatasource("testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/")
outputFilename = 'out/genericGeneTest.out.tsv'
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt'))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gafDatasource)
annotator.addDatasource(geneDS)
annotator.annotate()
# Check that there were actual annotations performed.
tsvReader = GenericTsvReader(outputFilename)
fields = tsvReader.getFieldNames()
self.assertTrue('CGC_Abridged_Other Syndrome/Disease' in fields, "'CGC_Other Syndrome/Disease' was not present in the header")
self.assertTrue('CGC_Abridged_Mutation Type' in fields, "'CGC_Abridged_Mutation Type' was not present in the header")
ctr = 1
linesThatShouldBeAnnotated = 0
for lineDict in tsvReader:
self.assertTrue('gene' in lineDict.keys())
if lineDict['gene'] in genesAvailable:
self.assertTrue(lineDict['CGC_Abridged_GeneID'] <> '', "'CGC_Abridged_GeneID' was missing on a row that should have been populated. Line: " + str(ctr))
linesThatShouldBeAnnotated = linesThatShouldBeAnnotated + 1
ctr = ctr + 1
self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")
示例12: testInternalFieldsSkipPrepend
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testInternalFieldsSkipPrepend(self):
""" Test that no prepending of "i_" is honored."""
outputFilename = "out/testInternalFields_v2.4.maf.tsv"
m = MutationDataFactory.default_create()
m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")
# The next annotation is real and should not be considered internal.
m.createAnnotation("gene", "EGFR")
outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config', other_options={OptionConstants.NO_PREPEND:True})
outputRenderer.renderMutations(iter([m]), ['No comments'])
configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
requiredColumns = configFile.get("general", "requiredColumns")
self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified.")
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReader = GenericTsvReader(outputFilename)
headers = tsvReader.getFieldNames()
self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
self.assertTrue("i_TEST" not in headers, "i_TEST was found in output headers when prepend was disabled.")
self.assertTrue("TEST" in headers, "TEST was not found in output headers.")
示例13: testInternalFields
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testInternalFields(self):
""" Test that an annotation that is not listed explicitly in the required or optional columns is rendered with i_ prepended """
outputFilename = "out/testInternalFields_v2.4.maf.tsv"
m = MutationData()
m.createAnnotation("TEST", "THIS IS A TEST", "TESTING")
# The next annotation is real and should not be considered internal.
m.createAnnotation("gene", "EGFR")
outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config')
outputRenderer.renderMutations(iter([m]), ['No comments'])
configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config')
requiredColumns = configFile.get("general", "requiredColumns")
self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified.")
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReader = GenericTsvReader(outputFilename)
headers = tsvReader.getFieldNames()
self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers")
self.assertTrue("TEST" not in headers, "TEST was found in output headers when it should have been renamed to i_TEST")
self.assertTrue("i_TEST" in headers, "i_TEST not found in output headers")
示例14: test_rendering_with_exons
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def test_rendering_with_exons(self):
"""Test that we can render a seg file that includes exons at end points"""
inputFilename = "testdata/seg/Middle_of_exon.seg.txt"
output_filename = "out/test_exon_seg2.gene_list.tsv"
db_dir = self.config.get('DEFAULT',"dbDir")
if os.path.exists(output_filename):
os.remove(output_filename)
annotator = Annotator()
run_spec = RunSpecificationFactory.create_run_spec("SEG_FILE", "GENE_LIST", inputFilename, output_filename,
datasourceDir=db_dir, annotating_type=RunSpecification.ANNOTATE_SEGMENTS)
annotator.initialize(run_spec)
annotator.annotate()
# Now check the output
output_reader = GenericTsvReader(output_filename)
headers = output_reader.getFieldNames()
for line_dict in output_reader:
self.assertTrue(line_dict['segment_start'] is not None)
self.assertTrue(line_dict['segment_start'].strip() != "")
if line_dict['segment_end_gene'] == "MAPK1":
self.assertTrue(line_dict['segment_end_exon'].strip() == "8+", "Should have been 8+, but saw: %s" % line_dict['segment_end_exon'].strip())
示例15: testBasicAnnotation
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getFieldNames [as 別名]
def testBasicAnnotation(self):
""" Annotate from a basic tsv gene file. Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
Using trimmed CancerGeneCensus as basis for this test.
"""
# cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
genesAvailable = [
"ABL1",
"ABL2",
"ACSL3",
"AF15Q14",
"AF1Q",
"AF3p21",
"AF5q31",
"AKAP9",
"AKT1",
"AKT2",
"ALDH2",
"ALK",
"ALO17",
"APC",
"ARHGEF12",
"ARHH",
"ARID1A",
"ARID2",
"ARNT",
"ASPSCR1",
"ASXL1",
"ATF1",
"ATIC",
"ATM",
"ATRX",
"BAP1",
"BCL10",
"BCL11A",
"BCL11B",
]
# We need a gaf data source to annotate gene
gafDatasource = TestUtils.createTranscriptProviderDatasource(config=self.config)
geneDS = DatasourceFactory.createDatasource(
"testdata/small_tsv_ds/small_tsv_ds.config", "testdata/small_tsv_ds/"
)
outputFilename = "out/genericGeneTest.out.tsv"
annotator = Annotator()
annotator.setInputCreator(MafliteInputMutationCreator("testdata/maflite/Patient0.snp.maf.txt"))
annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
annotator.addDatasource(gafDatasource)
annotator.addDatasource(geneDS)
annotator.annotate()
# Check that there were actual annotations performed.
tsvReader = GenericTsvReader(outputFilename)
fields = tsvReader.getFieldNames()
self.assertTrue(
"CGC_Abridged_Other Syndrome/Disease" in fields,
"'CGC_Other Syndrome/Disease' was not present in the header",
)
self.assertTrue(
"CGC_Abridged_Mutation Type" in fields, "'CGC_Abridged_Mutation Type' was not present in the header"
)
ctr = 1
linesThatShouldBeAnnotated = 0
for lineDict in tsvReader:
self.assertTrue("gene" in lineDict.keys())
if lineDict["gene"] in genesAvailable:
self.assertTrue(
lineDict["CGC_Abridged_GeneID"] != "",
"'CGC_Abridged_GeneID' was missing on a row that should have been populated. Line: " + str(ctr),
)
linesThatShouldBeAnnotated += 1
ctr += 1
self.assertTrue((linesThatShouldBeAnnotated) > 0, "Bad data -- cannot test missed detects.")