本文整理汇总了Python中oncotator.utils.MutUtils.MutUtils.extractProteinPosition方法的典型用法代码示例。如果您正苦于以下问题:Python MutUtils.extractProteinPosition方法的具体用法?Python MutUtils.extractProteinPosition怎么用?Python MutUtils.extractProteinPosition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类oncotator.utils.MutUtils.MutUtils
的用法示例。
在下文中一共展示了MutUtils.extractProteinPosition方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testProteinChange
# 需要导入模块: from oncotator.utils.MutUtils import MutUtils [as 别名]
# 或者: from oncotator.utils.MutUtils.MutUtils import extractProteinPosition [as 别名]
def testProteinChange(self):
""" Test that protein change parsing of start and end works.
"""
# Each tuple is test, ground truth
testInOuts = [
("p.K128_R130del", ['128','130']),
("p.W274G", ["274", "274"]),
("p.13_14AA>A", ["13", "14"]),
("p.G25_splice", ["25", "25"]),
("p.E813*", ["813", "813"]),
("p.SLPQPEQRPY59del", ["59", "59"])
]
ctr = 1
for test in testInOuts:
result = MutUtils.extractProteinPosition(test[0])
self.assertTrue(result != ['', ''], "Result was empty. " + str(test[0]) + ". ")
self.assertTrue(result[0] == test[1][0] and result[1] == test[1][1], "Result did not match for " + str(test[0]) + ". " + str(result) + " GT: " + str(test[1]))
ctr += 1
self.assertTrue(MutUtils.extractProteinPosition("blahblah") == ['', ''])
示例2: indexGeneProteinPosition
# 需要导入模块: from oncotator.utils.MutUtils import MutUtils [as 别名]
# 或者: from oncotator.utils.MutUtils.MutUtils import extractProteinPosition [as 别名]
def indexGeneProteinPosition(geneColumn, proteinInfoColumn, inputFilename, outputFilename):
"""
Creates an intermediate temporary file that includes two additional columns, startAA and endAA,
sorts the file, writes thee sorted file to outputFilename, and then indexes the sorted file.
:param geneColumn: name of the gene column in the inputFilename
:param proteinInfoColumn: name of the protein change or position column. Can be of formats: p.K128_R130del
(position 128 through 130) For more examples, see MutUtilsTest.testProteinChange()
:param inputFilename: input tsv filename
:param outputFilename: output filename
"""
startAACol = "startAA"
endAACol = "endAA"
# Create intermediate file. Do not use '#' for comments, since header can start with '#'
tsvReader = GenericTsvReader(inputFilename, commentPrepend=";")
# These are the outputHeaders for the intermediate file.
headers = tsvReader.getFieldNames()
if startAACol not in headers:
headers += [startAACol]
if endAACol not in headers:
headers += [endAACol]
# Write to the intermediate temporary file.
# This file is created in the current working directory."
temp = tempfile.NamedTemporaryFile()
csvfile = file(temp.name, 'w')
# Initialize the intermediate file's header.
tsvWriter = csv.DictWriter(csvfile, headers, delimiter='\t', lineterminator='\n')
# If the headers have a leading '#', get rid of it.
for i in range(0, len(headers)):
header = headers[i]
if header.startswith("#"):
headers[i] = header.replace("#", "")
tsvWriter.writeheader()
# Get indices of relevant columns.
gene_i = headers.index(geneColumn)
startAA_i = headers.index(startAACol)
endAA_i = headers.index(endAACol)
# Write each line of the intermediate file.
for row in tsvReader:
protein = row[proteinInfoColumn]
if protein is None or not protein.strip():
continue
[startAA, endAA] = MutUtils.extractProteinPosition(protein)
if not startAA.strip() or not endAA.strip():
continue
row[startAACol] = startAA
row[endAACol] = endAA
tsvWriter.writerow(row)
csvfile.flush()
csvfile.close()
# Sort the intermediate tsv file.
tsvSorter = TsvFileSorter(temp.name)
func = lambda val: ((val["Gene name"]).lower(), int(val["startAA"]), int(val["endAA"]))
# Use the whole file path name.
outputFilename = os.path.abspath(outputFilename)
tsvSorter.sortFile(outputFilename, func)
return TabixIndexer.index(destDir=os.path.dirname(os.path.abspath(outputFilename)),
inputFilename=outputFilename, fileColumnNumList=[gene_i, startAA_i, endAA_i])