本文整理匯總了Python中oncotator.utils.GenericTsvReader.GenericTsvReader.getComments方法的典型用法代碼示例。如果您正苦於以下問題:Python GenericTsvReader.getComments方法的具體用法?Python GenericTsvReader.getComments怎麽用?Python GenericTsvReader.getComments使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類oncotator.utils.GenericTsvReader.GenericTsvReader
的用法示例。
在下文中一共展示了GenericTsvReader.getComments方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: testTCGAMAFAsInputAndQuickAnnotate
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getComments [as 別名]
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例2: sortFile
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getComments [as 別名]
def sortFile(self, filename, func, length=50000):
"""
This method sorts the input file and writes out the sorted file to filename.
:param filename: sorted filename
:param func: function that converts each row of the input file to an unique, sortable key
:param length: maximum number of lines in a partition
"""
reader = GenericTsvReader(filename=self.readfilename, commentPrepend=self.commentPrepend,
delimiter=self.delimiter)
comments = reader.getComments()
fieldnames = reader.getFieldNames()
if fieldnames is None:
fieldnames = []
fieldnameIndexes = collections.OrderedDict()
if fieldnames is not None:
fieldnameIndexes = collections.OrderedDict([(x, i) for (i, x) in enumerate(fieldnames)])
iterable = iter(reader.getInputContentFP())
partitions = self._yieldPartitions(iterable, func, fieldnameIndexes, length)
with open(name=filename, mode='wb', buffering=64 * 1024) as writer:
writer.write(comments)
writer.write(string.join(fieldnames, self.delimiter) + "\n")
writer.writelines(self._merge(partitions)) # generators are allowed as inputs to writelines function
示例3: _validateTcgaMafContents
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getComments [as 別名]
def _validateTcgaMafContents(self, filename):
""" This is a utility, private method for unit tests to get a semblance that a valid maf file was created.
Note: This method has nothing to do with the TCGA validator.
"""
configFile = ConfigUtils.createConfigParser(os.path.join("configs", "tcgaMAF2.4_output.config"))
statinfo = os.stat(filename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + filename + ") is empty.")
tsvReader = GenericTsvReader(filename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
ctr = 1
for lineDict in tsvReader:
# TODO: Re-enable when GENCODE and HGNC datasources are concordant (or Entrez_Gene_ID is in the gencode gtf)
# if lineDict['Entrez_Gene_Id'] == "0":
# self.assertTrue(lineDict['Hugo_Symbol'] == "Unknown", "Entrez_Gene_Id was zero, but Hugo Symbol was not 'Unknown'. Line: " + str(ctr))
unknownKeys = []
self.assertTrue(lineDict["Tumor_Seq_Allele1"] != lineDict["Tumor_Seq_Allele2"], "Reference and alternate were equal in TCGA MAF output on line %d (%s)" % (ctr, lineDict["Tumor_Seq_Allele1"]))
self.assertTrue(lineDict["Tumor_Seq_Allele1"] == lineDict["Reference_Allele"], "Reference Allele should match Tumor_Seq_Allele1 on line " + str(ctr))
uniprot_aa_xform_counter = 0
for k in lineDict.keys():
if lineDict[k] == "__UNKNOWN__":
unknownKeys.append(k)
self.assertTrue('\r' not in lineDict[k], "Carriage return character found in an annotation value.")
requiredColumns = configFile.get("general", "requiredColumns")
optionalColumns = configFile.get("general", "optionalColumns")
exposedColumns = configFile.get("general", "exposedColumns")
if (k not in requiredColumns) and (k not in optionalColumns) and (k not in exposedColumns):
self.assertTrue(k.startswith("i_"), "Internal column was not prepended with 'i_'")
if lineDict['UniProt_AApos'] == "0":
uniprot_aa_xform_counter += 1
if lineDict["Variant_Type"] == VariantClassification.VT_DEL:
self.assertTrue(lineDict["Tumor_Seq_Allele2"] == "-")
if lineDict["Variant_Type"] == VariantClassification.VT_INS:
self.assertTrue(lineDict["Reference_Allele"] == "-")
unknownKeys.sort()
self.assertTrue(len(unknownKeys) == 0, "__UNKNOWN__ values (" + str(len(unknownKeys)) + ") seen on line " + str(ctr) + ", in fields: " + ", ".join(unknownKeys))
self.assertTrue(uniprot_aa_xform_counter < 10, "Too many uniprot aa xform values are zero (" + str(uniprot_aa_xform_counter) + "). This is probably an error.")
ctr += 1
示例4: _validateTcgaMafContents
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getComments [as 別名]
def _validateTcgaMafContents(self, filename):
"""
This is a utility, private method for unit tests to get a semblance that a valid maf file was created.
Note: This method has nothing to do with the TCGA validator.
TODO: This is code duplication from TCGA MAF Output RendererTest. This should be refactored into a base class
(to preserve self.assertTrue, etc).
"""
statinfo = os.stat(filename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + filename + ") is empty.")
tsvReader = GenericTsvReader(filename)
self.assertTrue(tsvReader.getComments().find('#version') <> -1, "First line did not specify a version number")
ctr = 1
for lineDict in tsvReader:
if lineDict['Entrez_Gene_Id'] == "0":
self.assertTrue(lineDict['Hugo_Symbol'] == "Unknown",
"Entrez_Gene_Id was zero, but Hugo Symbol was not 'Unknown'. Line: " + str(ctr))
unknownKeys = []
for k in lineDict.keys():
if lineDict[k] == "__UNKNOWN__":
unknownKeys.append(k)
self.assertTrue('\r' not in lineDict[k], "Carriage return character found in an annotation value.")
configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.3_output.config')
requiredColumns = configFile.get("general", "requiredColumns")
optionalColumns = configFile.get("general", "optionalColumns")
if (k not in requiredColumns) and (k not in optionalColumns):
self.assertTrue(k.startswith("i_"), "Internal column was not prepended with 'i_'")
unknownKeys.sort()
self.assertTrue(len(unknownKeys) == 0,
"__UNKNOWN__ values (" + str(len(unknownKeys)) + ") seen on line " + str(
ctr) + ", in fields: " + ", ".join(unknownKeys))
ctr += 1