本文整理匯總了Python中pymodule.PassingData.perSampleMatchFractionReduceJob方法的典型用法代碼示例。如果您正苦於以下問題:Python PassingData.perSampleMatchFractionReduceJob方法的具體用法?Python PassingData.perSampleMatchFractionReduceJob怎麽用?Python PassingData.perSampleMatchFractionReduceJob使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pymodule.PassingData
的用法示例。
在下文中一共展示了PassingData.perSampleMatchFractionReduceJob方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: addCheckingVCFOverlapSubWorkflow
# 需要導入模塊: from pymodule import PassingData [as 別名]
# 或者: from pymodule.PassingData import perSampleMatchFractionReduceJob [as 別名]
def addCheckingVCFOverlapSubWorkflow(self, workflow=None, chr2size=None, inputVCFData1=None, inputVCFData2=None, \
registerReferenceData=None, outputDirPrefix="", **keywords):
"""
2013.09.05
"""
if workflow is None:
workflow = self
if registerReferenceData is None:
registerReferenceData = self.registerReferenceData
sys.stderr.write("Adding Check-VCF overlap jobs between %s (patch 1) and %s (patch 2), job count=%s..."%
(len(inputVCFData1.jobDataLs), len(inputVCFData2.jobDataLs), self.no_of_jobs))
returnData = PassingData()
mapDirJob = self.addMkDirJob(outputDir="%sMap"%(outputDirPrefix))
reduceDirJob = self.addMkDirJob(outputDir="%sReduce"%(outputDirPrefix))
plotOutputDirJob = self.addMkDirJob(outputDir="%sPlot"%(outputDirPrefix))
overlapStatF = File(os.path.join(reduceDirJob.output, 'overlapSites.perChromosome.stat.tsv.gz'))
overlapSitesByChromosomeMergeJob=self.addStatMergeJob(statMergeProgram=workflow.mergeSameHeaderTablesIntoOne, \
outputF=overlapStatF, parentJobLs=[reduceDirJob], \
extraDependentInputLs=None, transferOutput=True, extraArguments=None)
overlapSitesMergeJob=self.addStatMergeJob(statMergeProgram=workflow.mergeSameHeaderTablesIntoOne, \
outputF=File(os.path.join(reduceDirJob.output, "overlapSites.tsv.gz")), parentJobLs=[reduceDirJob], \
extraDependentInputLs=None, transferOutput=True, extraArguments=None)
perSampleMatchFractionFile = File(os.path.join(reduceDirJob.output, 'perSampleMatchFraction.tsv.gz'))
perSampleMatchFractionReduceJob = self.addStatMergeJob(statMergeProgram=workflow.ReduceMatrixBySumSameKeyColsAndThenDivide, \
outputF=perSampleMatchFractionFile, parentJobLs=[reduceDirJob], extraDependentInputLs=[], transferOutput=True, \
extraArguments='-k 0 -v 1-2')
returnData.perSampleMatchFractionReduceJob = perSampleMatchFractionReduceJob
outputFile = File( os.path.join(plotOutputDirJob.output, 'perSampleMatchFraction_Hist.png'))
#no spaces or parenthesis or any other shell-vulnerable letters in the x or y axis labels (whichColumnPlotLabel, xColumnPlotLabel)
self.addDrawHistogramJob(workflow=workflow, executable=workflow.DrawHistogram, inputFileList=[perSampleMatchFractionFile], \
outputFile=outputFile, \
whichColumn=None, whichColumnHeader="no_of_matches_by_no_of_non_NA_pairs", whichColumnPlotLabel="matchFraction", \
logY=None, logCount=True, valueForNonPositiveYValue=50,\
minNoOfTotal=10,\
figureDPI=100, samplingRate=1,\
parentJobLs=[plotOutputDirJob, perSampleMatchFractionReduceJob ], \
extraDependentInputLs=None, \
extraArguments=None, transferOutput=True, job_max_memory=2000)
overlapStatSumF = File(os.path.join(reduceDirJob.output, 'overlapSites.wholeGenome.stat.tsv'))
overlapStatSumJob = self.addStatMergeJob(statMergeProgram=workflow.ReduceMatrixByChosenColumn, \
outputF=overlapStatSumF, parentJobLs=[reduceDirJob], extraDependentInputLs=[], transferOutput=True, \
extraArguments='-k 1000000 -v 1-25000') #The key column (-k 1000000) doesn't exist.
# essentially merging every rows into one
##25000 is a random big upper limit. 100 monkeys => 101*3 + 9 => 312 columns
#2012.8.17 the number of columns no longer expand as the number of samples because it's split into perSampleMatchFractionFile.
self.addInputToStatMergeJob(statMergeJob=overlapStatSumJob, inputF=overlapStatF, \
parentJobLs=[overlapSitesByChromosomeMergeJob])
vcfJobDataRBTree1 = self.constructGenomeFileRBTreeByFilenameInterval(jobDataStructure=inputVCFData1, chr2size=chr2size)
vcfJobDataRBTree2 = self.constructGenomeFileRBTreeByFilenameInterval(jobDataStructure=inputVCFData2, chr2size=chr2size)
noOfPairs=0
for vcfJobDataNode1 in vcfJobDataRBTree1:
chromosome = vcfJobDataNode1.key.chromosome
chrLength = chr2size.get(chromosome)
if chrLength is None:
sys.stderr.write("Warning: size for chromosome %s is unknown. set it to 1000.\n"%(chromosome))
chrLength = 1000
jobData1 = vcfJobDataNode1.value
vcfJobDataNodeListInTree2 = []
vcfJobDataRBTree2.findNodes(key=vcfJobDataNode1.key, node_ls=vcfJobDataNodeListInTree2)
for vcfJobDataNode2 in vcfJobDataNodeListInTree2:
noOfPairs += 1
jobData2 = vcfJobDataNode2.value
#narrow down either VCF file based on the interval info
overlap_start = max(vcfJobDataNode1.key.start, vcfJobDataNode2.key.start)
overlap_stop = min(vcfJobDataNode1.key.stop, vcfJobDataNode2.key.stop)
if overlap_start!=vcfJobDataNode1.key.start or overlap_stop!=vcfJobDataNode1.key.stop:
fileBasenamePrefix = "%s"%(utils.getFileBasenamePrefixFromPath(jobData1.file.name))
outputF = File(os.path.join(mapDirJob.output, "%s_%s_%s_%s.vcf"%(fileBasenamePrefix, chromosome, overlap_start, overlap_stop)))
selectVCF1Job = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
inputF=jobData1.file, outputF=outputF, \
interval="%s:%s-%s"%(chromosome, overlap_start, overlap_stop),\
refFastaFList=registerReferenceData.refFastaFList, \
parentJobLs=[mapDirJob] + jobData1.jobLs, extraDependentInputLs=jobData1.fileLs[1:], transferOutput=False, \
extraArguments=None, extraArgumentList=None, job_max_memory=2000, walltime=None)
jobData1 = self.constructJobDataFromJob(selectVCF1Job)
if overlap_start!=vcfJobDataNode2.key.start or overlap_stop!=vcfJobDataNode2.key.stop:
fileBasenamePrefix = "%s"%(utils.getFileBasenamePrefixFromPath(jobData2.file.name))
outputF = File(os.path.join(mapDirJob.output, "%s_%s_%s_%s.vcf"%(fileBasenamePrefix, chromosome, overlap_start, overlap_stop)))
selectVCF2Job = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
inputF=jobData2.file, outputF=outputF, \
interval="%s:%s-%s"%(chromosome, overlap_start, overlap_stop),\
refFastaFList=registerReferenceData.refFastaFList, \
parentJobLs=[mapDirJob] + jobData2.jobLs, extraDependentInputLs=jobData2.fileLs[1:], transferOutput=False, \
extraArguments=None, extraArgumentList=None, job_max_memory=2000, walltime=None)
jobData2 = self.constructJobDataFromJob(selectVCF2Job)
fileBasenamePrefix = "%s_vs_%s"%(utils.getFileBasenamePrefixFromPath(jobData1.file.name),
utils.getFileBasenamePrefixFromPath(jobData2.file.name))
#.........這裏部分代碼省略.........