本文整理汇总了Python中sonLib.bioio.getTempFile函数的典型用法代码示例。如果您正苦于以下问题:Python getTempFile函数的具体用法?Python getTempFile怎么用?Python getTempFile使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了getTempFile函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testCactusRealignSplitSequences
def testCactusRealignSplitSequences(self):
"""Runs cactus realign, splitting indels longer than 100bp, and check
that the coverage from the results is the same as the coverage from
realigning with no arguments.."""
for seqFile1, seqFile2 in seqFilePairGenerator():
lastzOutput = getTempFile(rootDir=self.tempDir)
runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput,
lastzArguments=self.defaultLastzArguments)
realignOutput = getTempFile(rootDir=self.tempDir)
runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
outputAlignmentsFile=realignOutput,
realignArguments=self.defaultRealignArguments)
splitRealignOutput = getTempFile(rootDir=self.tempDir)
runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
outputAlignmentsFile=splitRealignOutput,
realignArguments=self.defaultRealignArguments + " --splitIndelsLongerThanThis 100")
# Check coverage on seqFile1
splitRealignCoverage = runCactusCoverage(seqFile1, splitRealignOutput)
realignCoverage = runCactusCoverage(seqFile1, realignOutput)
self.assertTrue(splitRealignCoverage == realignCoverage)
# Check coverage on seqFile2
splitRealignCoverage = runCactusCoverage(seqFile2, splitRealignOutput)
realignCoverage = runCactusCoverage(seqFile2, realignOutput)
self.assertTrue(splitRealignCoverage == realignCoverage)
os.remove(realignOutput)
os.remove(splitRealignOutput)
示例2: down
def down(target, inputFile, fileStart, fileEnd, N, outputFile):
"""Input is a file and a range into that file to sort and an output location in which
to write the sorted file.
If the range is larger than a threshold N the range is divided recursively and
a follow on job is then created which merges back the results else
the file is sorted and placed in the output.
"""
if random.random() > 0.5:
raise RuntimeError() #This error is a test error, it does not mean the tests have failed.
length = fileEnd - fileStart
target.logToMaster("Am running a down target with length: %i from input file: %s" % (length, inputFile))
assert length >= 0
if length > N:
midPoint = getMidPoint(inputFile, fileStart, fileEnd)
assert midPoint >= fileStart
assert midPoint+1 < fileEnd
#We will subdivide the file
tempFile1 = getTempFile(rootDir=target.getGlobalTempDir())
tempFile2 = getTempFile(rootDir=target.getGlobalTempDir())
target.addChildTargetFn(down, (inputFile, fileStart, midPoint+1, N, tempFile1))
target.addChildTargetFn(down, (inputFile, midPoint+1, fileEnd, N, tempFile2)) #Add one to avoid the newline
target.setFollowOnTargetFn(up, (tempFile1, tempFile2, outputFile))
else:
#We can sort this bit of the file
copySubRangeOfFile(inputFile, fileStart, fileEnd, outputFile)
sort(outputFile)
示例3: testKeepingCoverageOnIngroups
def testKeepingCoverageOnIngroups(self):
"""Tests whether the --ingroupCoverageDir option works as
advertised."""
encodeRegion = "ENm001"
ingroups = ["human", "cow"]
outgroups = ["macaque", "rabbit", "dog"]
regionPath = os.path.join(self.encodePath, encodeRegion)
ingroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), ingroups)
outgroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), outgroups)
# Run blast in "ingroup vs outgroups" mode, requesting to keep
# the bed files that show outgroup coverage on the ingroup.
toilDir = os.path.join(self.tempDir, "tmp_toil")
outgroupFragmentPaths = [getTempFile(rootDir=self.tempDir) for outgroup in outgroups]
ingroupCoveragePaths = [getTempFile(rootDir=self.tempDir) for ingroup in ingroups]
runCactusBlastIngroupsAndOutgroups(ingroups=ingroupPaths, outgroups=outgroupPaths, alignmentsFile=self.tempOutputFile, outgroupFragmentPaths=outgroupFragmentPaths, ingroupCoveragePaths=ingroupCoveragePaths, toilDir=toilDir)
for i, ingroupPath in enumerate(ingroupPaths):
# Get the coverage from the outgroups independently and
# check that it's the same as the file in
# ingroupCoverageDir
otherIngroupPath = ingroupPaths[1] if i == 0 else ingroupPaths[0]
# To filter out alignments from the other ingroup and
# self-alignments we need to create a fasta with all the
# outgroup fragments in it.
outgroupsCombined = getTempFile(rootDir=self.tempDir)
for outgroupFragmentPath in outgroupFragmentPaths:
system("cat %s >> %s" % (outgroupFragmentPath, outgroupsCombined))
independentCoverageFile = getTempFile(rootDir=self.tempDir)
calculateCoverage(fromGenome=outgroupsCombined, sequenceFile=ingroupPath, cigarFile=self.tempOutputFile, outputFile=independentCoverageFile)
# find the coverage file cactus_blast kept (should be
# named according to the basename of the ingroup path
# file)
keptCoverageFile = ingroupCoveragePaths[i]
self.assertTrue(filecmp.cmp(independentCoverageFile, keptCoverageFile))
示例4: testCPecanRealignSplitSequences
def testCPecanRealignSplitSequences(self):
"""Runs cPecanRealign, splitting indels longer than 100bp, and check
that the coverage from the results is the same as the coverage from
realigning with no arguments.."""
for seqFile1, seqFile2 in seqFilePairGenerator():
# Drop the lastz command since it's not needed. But this
# is still convenient to use the same parameters as all
# the other tests
realignCommand, _ = getCommands(seqFile1, seqFile2)
splitRealignCommand = realignCommand + " --splitIndelsLongerThanThis 100"
realignOutput = getTempFile()
splitRealignOutput = getTempFile()
realignCommand += " > %s" % realignOutput
splitRealignCommand += " > %s" % splitRealignOutput
system(realignCommand)
system(splitRealignCommand)
# Check coverage on seqFile1
#The following will fail until we refactor.
splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, splitRealignOutput))
realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, realignOutput))
self.assertTrue(splitRealignCoverage == realignCoverage)
# Check coverage on seqFile2
splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, splitRealignOutput))
realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, realignOutput))
self.assertTrue(splitRealignCoverage == realignCoverage)
os.remove(realignOutput)
os.remove(splitRealignOutput)
示例5: testBlossom
def testBlossom(self):
""" Tests blossom5 program using randGraph.py input
"""
for test in xrange(self.testNo):
tempInputFile = getTempFile()
tempOutputFile = getTempFile()
self.tempFiles.append(tempInputFile)
self.tempFiles.append(tempOutputFile)
# Create sample/test input graph file
system("blossom_randGraph.py > %s" % tempInputFile)
# Run blossom5
system("blossom5 -e %s -w %s >& /dev/null" % (tempInputFile, tempOutputFile))
# Now check if output is valid
f = open(tempOutputFile, 'r')
lineIdx = 0
for line in f:
line = line.rstrip()
if lineIdx == 0:
(vertexNum, edgeNum) = line.split()
vertexNum = int(vertexNum)
edgeNum = int(edgeNum)
vertexArray = [0] * vertexNum
# Number of vertices must be even
self.assertEqual(vertexNum % 2, 0)
# Number of edges is half the number of vertices
self.assertEqual(vertexNum/2, edgeNum)
else:
(vertexI, vertexJ,) = line.split()
vertexI = int(vertexI)
vertexJ = int(vertexJ)
vertexArray[vertexI] += 1
vertexArray[vertexJ] += 1
# Vertex indices must be 0<= i,j < V
self.assertTrue(vertexI in xrange(vertexNum))
self.assertTrue(vertexJ in xrange(vertexNum))
lineIdx += 1
# Must have the correct number of edges
self.assertEqual(edgeNum, lineIdx-1)
badCount = 0
for i in vertexArray:
if i != 1:
badCount += 1
# Each vertex must be only in one edge
self.assertEqual(badCount, 0)
logger.info("Ran the test(s) of the blossom program okay")
示例6: testAddingOutgroupsImprovesResult
def testAddingOutgroupsImprovesResult(self):
"""Run blast on "ingroup" and "outgroup" encode regions, and ensure
that adding an extra outgroup only adds alignments if
possible, and doesn't lose any
"""
encodeRegion = "ENm001"
ingroups = ["human", "macaque"]
outgroups = ["rabbit", "dog", "rat", "platypus", "xenopus", "fugu"]
MAX_NUM_OUTGROUPS = 3
# subselect a random set of outgroups in the same order
outgroups = [outgroups[i] for i in sorted(random.sample(xrange(len(outgroups)), MAX_NUM_OUTGROUPS))]
regionPath = os.path.join(self.encodePath, encodeRegion)
ingroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), ingroups)
outgroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), outgroups)
results = []
for numOutgroups in xrange(1, len(outgroups) + 1):
# Align w/ increasing numbers of outgroups
subResults = getTempFile()
subOutgroupPaths = outgroupPaths[:numOutgroups]
print "aligning %s vs %s" % (",".join(ingroupPaths), ",".join(subOutgroupPaths))
tmpToil = os.path.join(self.tempDir, "outgroupToil")
runCactusBlastIngroupsAndOutgroups(ingroupPaths, subOutgroupPaths, alignmentsFile=subResults, toilDir=tmpToil)
results.append(subResults)
# Print diagnostics about coverage
for i, subResults in enumerate(results):
for ingroup, ingroupPath in zip(ingroups, ingroupPaths):
ingroupCoverage = getTempFile(rootDir=self.tempDir)
calculateCoverage(sequenceFile=ingroupPath, cigarFile=subResults, outputFile=ingroupCoverage)
coveredBases = popenCatch("cat %s | awk '{ total += $3 - $2 } END { print total }'" % ingroupCoverage)
print "covered bases on %s using %d outgroups: %s" % (ingroup, i + 1, coveredBases)
resultsSets = map(lambda x : loadResults(x), results)
for i, moreOutgroupsResults in enumerate(resultsSets[1:]):
# Make sure the results from (n+1) outgroups are
# (very nearly) a superset of the results from n outgroups
print "Using %d addl outgroup(s):" % (i + 1)
comparator = ResultComparator(resultsSets[0], moreOutgroupsResults)
print comparator
self.assertTrue(comparator.sensitivity >= 0.99)
# Ensure that the new alignments don't cover more than
# x% of already existing alignments to human
for i in xrange(1, len(resultsSets)):
prevResults = resultsSets[i-1][0]
curResults = resultsSets[i][0]
prevResultsHumanPos = set(map(lambda x: (x[0], x[1]) if "human" in x[0] else (x[2], x[3]), filter(lambda x: "human" in x[0] or "human" in x[2], prevResults)))
newAlignments = curResults.difference(prevResults)
newAlignmentsHumanPos = set(map(lambda x: (x[0], x[1]) if "human" in x[0] else (x[2], x[3]), filter(lambda x: "human" in x[0] or "human" in x[2], newAlignments)))
print "addl outgroup %d:" % i
print "bases re-covered: %f (%d)" % (len(newAlignmentsHumanPos.intersection(prevResultsHumanPos))/float(len(prevResultsHumanPos)), len(newAlignmentsHumanPos.intersection(prevResultsHumanPos)))
for subResult in results:
os.remove(subResult)
示例7: liftover
def liftover(self, bedLine):
"""Lift a bedLine over to the target genome, parse the PSL output, and
return a map from target sequence -> [(query block, [target
block(s)])]
Blocks are (start, end, strand) where start < end
"""
tempSrc = getTempFile("ContiguousRegions.tempSrc.bed",
rootDir=self.tempRoot)
tempDest = getTempFile("ContiguousRegions.tempDest.psl",
rootDir=self.tempRoot)
open(tempSrc, 'w').write("%s\n" % bedLine)
cmd = "halLiftover --outPSL %s %s %s %s %s" % (self.alignment,
self.srcGenome,
tempSrc,
self.destGenome,
tempDest)
bioio.system(cmd)
pslLines = open(tempDest).read().split("\n")
os.remove(tempSrc)
os.remove(tempDest)
pslLines = map(lambda x: x.split(), pslLines)
# Get target blocks for every query block. All adjacencies
# within a block are by definition preserved. Adjacencies
# between target blocks (and query blocks with the commandline
# option) are what determine if the structure is preserved.
# dict is to keep blocks separated by target sequence & strand
blocks = defaultdict(list)
for pslLine in pslLines:
if pslLine == []:
continue
qStrand = pslLine[8][0]
assert(qStrand == '+')
if len(pslLine[8]) != 1:
assert(len(pslLine[8]) == 2)
tStrand = pslLine[8][1]
else:
tStrand = '+'
tName = pslLine[13]
tSize = int(pslLine[14])
blockSizes = [int(i) for i in pslLine[18].split(",") if i != '']
qStarts = [int(i) for i in pslLine[19].split(",") if i != '']
tStarts = [int(i) for i in pslLine[20].split(",") if i != '']
assert(len(blockSizes) == len(qStarts) and
len(qStarts) == len(tStarts))
for blockLen, qStart, tStart in zip(blockSizes, qStarts, tStarts):
qBlock = (qStart, qStart + blockLen, qStrand)
tBlock = (tStart, tStart + blockLen, tStrand) if tStrand == '+' else (tSize - tStart - blockLen, tSize - tStart, tStrand)
blocks[tName].append((qBlock, tBlock))
# Sort & merge query blocks in cases of duplication
return self.mergeBlocks(blocks)
示例8: testRepeatBed
def testRepeatBed(self):
tempFile = getTempFile(rootDir=os.getcwd())
tempFile2 = getTempFile(rootDir=os.getcwd())
fileHandle = open(tempFile, 'w')
fileHandle.write(">hello boo\nacTGACCCCgtcgAAcAAccc\n>foo\nAaaAAAAAAA")
fileHandle.close()
system("getRepeatBed %s %s" % (tempFile, tempFile2))
fileHandle = open(tempFile2, 'r')
fn = lambda (i, j, k) : (i, int(j), int(k))
j = [ fn(i.split()) for i in fileHandle.readlines() ]
print j
assert j == [ ("hello", 0, 2), ("hello", 9, 13), ("hello", 15, 16), ("hello", 18, 21), ("foo", 1, 3) ]
os.remove(tempFile)
os.remove(tempFile2)
示例9: testCopySubRangeOfFile
def testCopySubRangeOfFile(self):
for test in xrange(self.testNo):
tempDir = getTempDirectory(os.getcwd())
tempFile = getTempFile(rootDir=tempDir)
outputFile = getTempFile(rootDir=tempDir)
makeFileToSort(tempFile)
fileSize = os.path.getsize(tempFile)
assert fileSize > 0
fileStart = random.choice(xrange(0, fileSize))
fileEnd = random.choice(xrange(fileStart, fileSize))
copySubRangeOfFile(tempFile, fileStart, fileEnd, outputFile)
l = open(outputFile, 'r').read()
l2 = open(tempFile, 'r').read()[fileStart:fileEnd]
checkEqual(l, l2)
system("rm -rf %s" % tempDir)
示例10: wrap
def wrap(self):
# Pretty much ripped from the toil worker.py setup.
tempPath = getTempFile()
oldStdout = os.dup(1)
oldStderr = os.dup(2)
#Open the file to send stdout/stderr to.
logFh = os.open(tempPath, os.O_RDWR | os.O_CREAT | os.O_APPEND)
#Replace standard output with a descriptor for the log file
os.dup2(logFh, 1)
#Replace standard error with a descriptor for the log file
os.dup2(logFh, 2)
try:
fn(self)
except:
oldStdoutFile = os.fdopen(oldStdout, 'w')
logFile = os.fdopen(os.dup(logFh))
logFile.seek(0)
oldStdoutFile.write(logFile.read())
raise
finally:
# Close the descriptor we used to open the file
os.close(logFh)
# Reset stdout and stderr
os.dup2(oldStdout, 1)
os.dup2(oldStderr, 2)
os.remove(tempPath)
示例11: getFastaDict
def getFastaDict(self):
temp = getTempFile(rootDir=self.getGlobalTempDir())
system("hal2fasta %s %s > %s" % (self.halPath, self.genome, temp))
ret = {}
for header, seq in fastaRead(temp):
ret[header] = seq
return ret
示例12: run
def run(self):
speciesTree = popenCatch("halStats --tree %s" % (self.opts.halFile)).strip()
chromSizes = getChromSizes(self.opts.halFile, self.opts.refGenome)
positions = []
# For ensuring that a column isn't counted multiple times from
# different reference positions.
positionSet = set(positions)
for i in xrange(self.opts.numSamples):
# Have to sample the columns here since otherwise it can
# be difficult to independently seed several RNGs
pos = samplePosition(chromSizes)
if pos not in positionSet:
positions.append(pos)
positionSet.add(pos)
outputs = []
for sliceStart in xrange(0, self.opts.numSamples,
self.opts.samplesPerJob):
slice = positions[sliceStart:sliceStart + self.opts.samplesPerJob]
outputFile = getTempFile(rootDir=self.getGlobalTempDir())
outputs.append(outputFile)
self.addChildTarget(ScoreColumns(self.opts, slice,
outputFile, speciesTree, positionSet))
self.setFollowOnTarget(Summarize(self.opts, outputs, self.opts.outputFile, self.opts.writeMismatchesToFile))
示例13: testMerge
def testMerge(self):
for test in xrange(self.testNo):
tempDir = getTempDirectory(os.getcwd())
tempFile1 = getTempFile(rootDir=tempDir)
tempFile2 = getTempFile(rootDir=tempDir)
tempFile3 = getTempFile(rootDir=tempDir)
makeFileToSort(tempFile1)
makeFileToSort(tempFile2)
sort(tempFile1)
sort(tempFile2)
merge(tempFile1, tempFile2, tempFile3)
lines1 = loadFile(tempFile1) + loadFile(tempFile2)
lines1.sort()
lines2 = loadFile(tempFile3)
checkEqual(lines1, lines2)
system("rm -rf %s" % tempDir)
示例14: killMasterAndParasol
def killMasterAndParasol():
"""Method to destroy master process
"""
tempFile = getTempFile()
popen("ps -a", tempFile)
fileHandle = open(tempFile, 'r')
line = fileHandle.readline()
#Example parasol state lines:
#67401 ttys002 0:00.06 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i
#67403 ttys002 0:00.65 /Users/benedictpaten/kent/src/parasol/bin/paraHub -log=/tmp/hub.2009-07-08.log machineList subnet=127.0.0
#68573 ttys002 0:00.00 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i
while line != '':
tokens = line.split()
if 'paraNode' in line or 'paraHub' in line:
if random.random() > 0.5:
i = os.system("kill %i" % int(tokens[0]))
logger.info("Tried to kill parasol process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i))
break
elif 'jobTreeMaster.py' in line:
logger.info("Have job tree master line")
if random.random() > 0.5:
i = os.system("kill %i" % int(tokens[0]))
logger.info("Tried to kill master process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i))
break
line = fileHandle.readline()
fileHandle.close()
os.remove(tempFile)
parasolRestart()
示例15: scriptTree_SortTest
def scriptTree_SortTest(testNo, batchSystem, lines=10000, maxLineLength=10, N=10000):
"""Tests scriptTree/jobTree by sorting a file in parallel.
"""
for test in xrange(testNo):
tempDir = getTempDirectory(os.getcwd())
tempFile = getTempFile(rootDir=tempDir)
jobTreeDir = os.path.join(tempDir, "testJobTree")
makeFileToSort(tempFile, lines=lines, maxLineLength=maxLineLength)
#First make our own sorted version
fileHandle = open(tempFile, 'r')
l = fileHandle.readlines()
l.sort()
fileHandle.close()
#Sort the file
while True:
command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %i --batchSystem %s --jobTime 1.0 --maxCpus 20 --retryCount 2" % (jobTreeDir, tempFile, N, batchSystem) #, retryCount)
system(command)
try:
system("jobTreeStatus --jobTree %s --failIfNotComplete" % jobTreeDir)
break
except:
print "The jobtree failed and will be restarted"
#raise RuntimeError()
continue
#Now check the file is properly sorted..
#Now get the sorted file
fileHandle = open(tempFile, 'r')
l2 = fileHandle.readlines()
fileHandle.close()
checkEqual(l, l2)
system("rm -rf %s" % tempDir)