本文整理匯總了Python中oncotator.utils.GenericTsvReader.GenericTsvReader.getCommentsAsList方法的典型用法代碼示例。如果您正苦於以下問題:Python GenericTsvReader.getCommentsAsList方法的具體用法?Python GenericTsvReader.getCommentsAsList怎麽用?Python GenericTsvReader.getCommentsAsList使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類oncotator.utils.GenericTsvReader.GenericTsvReader
的用法示例。
在下文中一共展示了GenericTsvReader.getCommentsAsList方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: testTCGAMAFAsInputAndQuickAnnotate
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getCommentsAsList [as 別名]
def testTCGAMAFAsInputAndQuickAnnotate(self):
""" Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """
inputFilename = "testdata/maf/Patient0.maf.annotated"
tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config')
outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv"
outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config')
annotator = Annotator()
annotator.setInputCreator(tmp)
annotator.setOutputRenderer(outputRenderer)
ds = DatasourceFactory.createDatasource("testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/")
annotator.addDatasource(ds)
annotator.annotate()
statinfo = os.stat(outputFilename)
self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.")
tsvReaderIn = GenericTsvReader(inputFilename)
tsvReader = GenericTsvReader(outputFilename)
self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number")
self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header")
self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header")
ctrOut = 0
for lineDict in tsvReader:
ctrOut += 1
ctrIn = 0
for lineDict in tsvReaderIn:
ctrIn += 1
ctrIn += len(tsvReaderIn.getCommentsAsList())
ctrOut += len(tsvReader.getCommentsAsList())
self.assertTrue(ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
示例2: testAnnotationWithExampleVcf
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getCommentsAsList [as 別名]
def testAnnotationWithExampleVcf(self):
"""
Tests whether parsed annotations match the actual annotations in a simple TSV. Missing format fields yield -->"" ".,." --> ","
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
outputFilename = os.path.join("out", "example.out.tsv")
expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.out.tsv"])
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
expected = pandas.read_csv(expectedOutputFilename, sep='\t')
currentColNames = set()
for i in range(len(current.columns)):
currentColNames.add(current.columns[i])
expectedColNames = set()
for i in range(len(expected.columns)):
expectedColNames.add(expected.columns[i])
self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
"Should have the same columns")
self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")
for colName in currentColNames:
self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
pandas.isnull(expected[colName]))) ==
len(current.index), "Should have the same values in column " + colName + ": \n" +
str(current[colName]) + "\nvs\n" + str(expected[colName]))
示例3: testMissingFilter
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getCommentsAsList [as 別名]
def testMissingFilter(self):
"""
Tests that the missing FILTER fields are parsed correctly.
"""
inputFilename = os.path.join(*["testdata", "vcf", "example.missing_filters.vcf"])
outputFilename = os.path.join("out", "example.missing_filters.out.tsv")
expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.missing_filters.out.tsv"])
creator = VcfInputMutationCreator(inputFilename)
creator.createMutations()
renderer = SimpleOutputRenderer(outputFilename)
annotator = Annotator()
annotator.setInputCreator(creator)
annotator.setOutputRenderer(renderer)
annotator.annotate()
tsvReader = GenericTsvReader(outputFilename)
current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
expected = pandas.read_csv(expectedOutputFilename, sep='\t')
currentColNames = set()
for i in range(len(current.columns)):
currentColNames.add(current.columns[i])
expectedColNames = set()
for i in range(len(expected.columns)):
expectedColNames.add(expected.columns[i])
self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
"Should have the same columns")
self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")
for colName in currentColNames:
self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
pandas.isnull(expected[colName]))) ==
len(current.index), "Should have the same values in column " + colName)
示例4: MafliteInputMutationCreator
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getCommentsAsList [as 別名]
class MafliteInputMutationCreator(InputMutationCreator):
"""
A maflite file is a simple tsv file
See the config file maflite_input.config for aliases and required headers.
Additional columns can be included and will be annotate to the mutation using the header name.
IMPORTANT NOTE: maflite will look at all aliases for alt_allele (see maflite_input.config) and choose the first that does not match the ref_allele
"""
def __init__(self, filename, mutation_data_factory=None, configFile='maflite_input.config', genomeBuild="hg19", other_options=None):
"""
Constructor
"""
super(MafliteInputMutationCreator, self).__init__(filename, mutation_data_factory, configFile, genomeBuild, other_options)
self.logger = logging.getLogger(__name__)
self.config = ConfigUtils.createConfigParser(configFile)
self._tsvReader = GenericTsvReader(filename)
# Key is the required columns and the values are a list of valid alternative headers.
# Key is column name to an alternative.
self._alternativeDict = ConfigUtils.buildAlternateKeyDictionaryFromConfig(self.config)
self._reverseAlternativeDict = ConfigUtils.buildReverseAlternativeDictionary(self._alternativeDict)
missingRequiredHeaders = []
required_columns = sorted(self.config.get("general", "required_headers").split(","))
self._build = genomeBuild
self.logger.info("Initializing a maflite file with the following header: " + str(self._tsvReader.getFieldNames()))
# The specified fields are those that were given in the input.
self._specified_fields = self._tsvReader.getFieldNames()
for col in required_columns:
if col not in self._specified_fields:
isAltFound = False
for alt in self._alternativeDict.get(col, []):
if alt in self._specified_fields:
isAltFound = True
break
if not isAltFound:
# build is optional.
if col != "build":
missingRequiredHeaders.append(col)
missingRequiredHeaders.sort()
if len(missingRequiredHeaders) > 0:
raise MafliteMissingRequiredHeaderException("Specified maflite file (" + filename + ") missing required headers: " + ",".join(missingRequiredHeaders) )
def getComments(self):
return self._tsvReader.getCommentsAsList()
def getMetadata(self):
result = Metadata()
fieldNames = self._specified_fields
fieldNameAliases = self._reverseAlternativeDict.keys()
for fieldName in fieldNames:
if fieldName in fieldNameAliases:
fieldName = self._reverseAlternativeDict[fieldName]
result[fieldName] = Annotation("", datasourceName="INPUT")
return result
def _find_alt_allele_in_other_field(self, raw_line_dict, ref_allele):
"""Check all the possible alt allele columns and choose the one that does not match the reference allele. """
list_alternates = self._alternativeDict.get("alt_allele", [])
for candidate_field in list_alternates:
candidate_value = raw_line_dict.get(candidate_field, "").strip() #remove any trailing whitespace if present
if candidate_value != "" and candidate_value != ref_allele:
return candidate_value
return ref_allele
def createMutations(self):
""" No inputs.
Returns a generator of mutations built from the specified maflite file. """
aliasKeys = self._reverseAlternativeDict.keys()
allColumns = self._specified_fields
for line in self._tsvReader:
# We only need to assign fields that are mutation attributes and have a different name in the maflite file.
mut = self._mutation_data_factory.create(build=self._build)
for col in allColumns:
# Three scenarios:
# 1) col is name of mutation data field -- simple createAnnotation
# 2) col name is an alias for a mutation data field -- do lookup then createAnnotation
# 3) col name is not an alias for a mutation data field -- simple createAnnotation
if col in aliasKeys:
realKey = self._reverseAlternativeDict[col]
self.logger.debug(realKey + " found from " + col)
val = line[col]
#.........這裏部分代碼省略.........
示例5: index
# 需要導入模塊: from oncotator.utils.GenericTsvReader import GenericTsvReader [as 別名]
# 或者: from oncotator.utils.GenericTsvReader.GenericTsvReader import getCommentsAsList [as 別名]
def index(destDir, inputFilename, fileColumnNumList=None, preset=None):
"""
Create a tabix index file for genomic position datasource tsv files.
Prerequisites (for genomic position indexed):
Input file has three columns that can be mapped to chromosome, start position, and end position without any modification.
For example, ['hg19.oreganno.chrom', 'hg19.oreganno.chromStart', 'hg19.oreganno.chromEnd'] in oreganno.hg19.txt
This will overwrite an existing index (since the force parameter is set to True in pysam.tabix_index() call).
Also, in cases where the inputFilename doesn't end with a ".gz", the a compressed file will be created and indexed.
If the gz and tbi files already exist, this will simply copy the files to the specified destination.
:param destDir: destination directory
:param fileColumnNumList: ordered list. This list contains the corresponding entries (column numbers)
in the tsv file. Typically, this would be [chr,start,end] or [gene, startAA, endAA]
:param inputFilename: tsv file input
:param preset: if preset is provided, the column coordinates are taken from a preset. Valid values for preset
are "gff", "bed", "sam", "vcf", "psltbl", and "pileup". "tsv" is also recognized, but this will use the tabix
generic indexing (after commenting out the header line)
"""
fileColumnNumList = [] if fileColumnNumList is None else fileColumnNumList
inputFilename = os.path.abspath(inputFilename)
fileDir = os.path.dirname(inputFilename)
fileName, fileExtension = os.path.splitext(os.path.basename(inputFilename))
if fileExtension in (".gz",):
# Ensure .gz.tbi file is there as well
inputIndexFilename = os.path.join(fileDir, string.join([inputFilename, "tbi"], "."))
if not os.path.exists(inputIndexFilename):
msg = "Missing tabix index file %s." % inputIndexFilename
raise TabixIndexerFileMissingError(msg)
outputFilename = os.path.join(destDir, string.join([fileName, "gz"], "."))
shutil.copyfile(inputFilename, outputFilename)
outputIndexFilename = os.path.join(destDir, string.join([fileName, "gz", "tbi"], "."))
shutil.copyfile(inputIndexFilename, outputIndexFilename)
return outputFilename
outputFilename = os.path.join(destDir, string.join([fileName, ".tabix_indexed", fileExtension], ""))
# Load the file into a tsvReader.
if preset in ("gff", "bed", "sam", "vcf", "psltbl", "pileup"):
# Copy the input file to output file.
shutil.copyfile(inputFilename, outputFilename)
tabix_index = pysam.tabix_index(filename=outputFilename, force=True, preset=preset)
else:
# Need to comment out the header line with a "#", so we cannot simply copy the file.
input_reader = GenericTsvReader(inputFilename)
with file(outputFilename, 'w') as output_writer:
output_writer.writelines(input_reader.getCommentsAsList())
# Add "#" for the header line.
output_writer.write("#")
field_names = input_reader.getFieldNames()
output_writer.write("\t".join(field_names))
output_writer.write("\n")
output_writer.flush()
# Write the rest of the file
# This might be too slow, since a raw reader would be pretty fast.
for line_dict in input_reader:
line_list = [line_dict[k] for k in field_names]
line_rendered = "\t".join(line_list) + "\n"
output_writer.write(line_rendered)
input_reader.close()
tabix_index = pysam.tabix_index(filename=outputFilename, force=True, seq_col=fileColumnNumList[0],
start_col=fileColumnNumList[1], end_col=fileColumnNumList[2])
if tabix_index is None:
raise OncotatorException("Could not create a tabix index from this input file: " + outputFilename)
return tabix_index