當前位置: 首頁>>代碼示例>>Python>>正文


Python PassingData.perSampleMatchFractionReduceJob方法代碼示例

本文整理匯總了Python中pymodule.PassingData.perSampleMatchFractionReduceJob方法的典型用法代碼示例。如果您正苦於以下問題:Python PassingData.perSampleMatchFractionReduceJob方法的具體用法?Python PassingData.perSampleMatchFractionReduceJob怎麽用?Python PassingData.perSampleMatchFractionReduceJob使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pymodule.PassingData的用法示例。


在下文中一共展示了PassingData.perSampleMatchFractionReduceJob方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: addCheckingVCFOverlapSubWorkflow

# 需要導入模塊: from pymodule import PassingData [as 別名]
# 或者: from pymodule.PassingData import perSampleMatchFractionReduceJob [as 別名]
	def addCheckingVCFOverlapSubWorkflow(self, workflow=None, chr2size=None, inputVCFData1=None, inputVCFData2=None, \
					registerReferenceData=None, outputDirPrefix="", **keywords):
		"""
		2013.09.05
		"""
		if workflow is None:
			workflow = self
		if registerReferenceData is None:
			registerReferenceData = self.registerReferenceData
		
		sys.stderr.write("Adding Check-VCF overlap jobs between %s (patch 1) and %s (patch 2), job count=%s..."%
						(len(inputVCFData1.jobDataLs), len(inputVCFData2.jobDataLs), self.no_of_jobs))
		returnData = PassingData()
		
		mapDirJob = self.addMkDirJob(outputDir="%sMap"%(outputDirPrefix))
		reduceDirJob = self.addMkDirJob(outputDir="%sReduce"%(outputDirPrefix))
		plotOutputDirJob = self.addMkDirJob(outputDir="%sPlot"%(outputDirPrefix))
		
		overlapStatF = File(os.path.join(reduceDirJob.output, 'overlapSites.perChromosome.stat.tsv.gz'))
		overlapSitesByChromosomeMergeJob=self.addStatMergeJob(statMergeProgram=workflow.mergeSameHeaderTablesIntoOne, \
					outputF=overlapStatF, parentJobLs=[reduceDirJob], \
					extraDependentInputLs=None, transferOutput=True, extraArguments=None)
		
		overlapSitesMergeJob=self.addStatMergeJob(statMergeProgram=workflow.mergeSameHeaderTablesIntoOne, \
					outputF=File(os.path.join(reduceDirJob.output, "overlapSites.tsv.gz")), parentJobLs=[reduceDirJob], \
					extraDependentInputLs=None, transferOutput=True, extraArguments=None)
		
		perSampleMatchFractionFile = File(os.path.join(reduceDirJob.output, 'perSampleMatchFraction.tsv.gz'))
		perSampleMatchFractionReduceJob = self.addStatMergeJob(statMergeProgram=workflow.ReduceMatrixBySumSameKeyColsAndThenDivide, \
					outputF=perSampleMatchFractionFile, parentJobLs=[reduceDirJob], extraDependentInputLs=[], transferOutput=True, \
					extraArguments='-k 0 -v 1-2')
		returnData.perSampleMatchFractionReduceJob = perSampleMatchFractionReduceJob
		
		outputFile = File( os.path.join(plotOutputDirJob.output, 'perSampleMatchFraction_Hist.png'))
		#no spaces or parenthesis or any other shell-vulnerable letters in the x or y axis labels (whichColumnPlotLabel, xColumnPlotLabel)
		self.addDrawHistogramJob(workflow=workflow, executable=workflow.DrawHistogram, inputFileList=[perSampleMatchFractionFile], \
							outputFile=outputFile, \
					whichColumn=None, whichColumnHeader="no_of_matches_by_no_of_non_NA_pairs", whichColumnPlotLabel="matchFraction", \
					logY=None, logCount=True, valueForNonPositiveYValue=50,\
					minNoOfTotal=10,\
					figureDPI=100, samplingRate=1,\
					parentJobLs=[plotOutputDirJob, perSampleMatchFractionReduceJob ], \
					extraDependentInputLs=None, \
					extraArguments=None, transferOutput=True,  job_max_memory=2000)
		
		overlapStatSumF = File(os.path.join(reduceDirJob.output, 'overlapSites.wholeGenome.stat.tsv'))
		overlapStatSumJob = self.addStatMergeJob(statMergeProgram=workflow.ReduceMatrixByChosenColumn, \
						outputF=overlapStatSumF, parentJobLs=[reduceDirJob], extraDependentInputLs=[], transferOutput=True, \
						extraArguments='-k 1000000 -v 1-25000')	#The key column (-k 1000000) doesn't exist.
						# essentially merging every rows into one 
						##25000 is a random big upper limit. 100 monkeys => 101*3 + 9 => 312 columns
						#2012.8.17 the number of columns no longer expand as the number of samples because it's split into perSampleMatchFractionFile.
		self.addInputToStatMergeJob(statMergeJob=overlapStatSumJob, inputF=overlapStatF, \
							parentJobLs=[overlapSitesByChromosomeMergeJob])
		
		vcfJobDataRBTree1 = self.constructGenomeFileRBTreeByFilenameInterval(jobDataStructure=inputVCFData1, chr2size=chr2size)
		vcfJobDataRBTree2 = self.constructGenomeFileRBTreeByFilenameInterval(jobDataStructure=inputVCFData2, chr2size=chr2size)
		
		noOfPairs=0
		for vcfJobDataNode1 in vcfJobDataRBTree1:
			chromosome = vcfJobDataNode1.key.chromosome
			chrLength = chr2size.get(chromosome)
			if chrLength is None:
				sys.stderr.write("Warning: size for chromosome %s is unknown. set it to 1000.\n"%(chromosome))
				chrLength = 1000
			jobData1 = vcfJobDataNode1.value
			
			vcfJobDataNodeListInTree2 = []
			vcfJobDataRBTree2.findNodes(key=vcfJobDataNode1.key, node_ls=vcfJobDataNodeListInTree2)
			for vcfJobDataNode2 in vcfJobDataNodeListInTree2:
				noOfPairs += 1
				jobData2 = vcfJobDataNode2.value
				
				#narrow down either VCF file based on the interval info
				overlap_start = max(vcfJobDataNode1.key.start, vcfJobDataNode2.key.start)
				overlap_stop = min(vcfJobDataNode1.key.stop, vcfJobDataNode2.key.stop)
				if overlap_start!=vcfJobDataNode1.key.start or overlap_stop!=vcfJobDataNode1.key.stop:
					fileBasenamePrefix = "%s"%(utils.getFileBasenamePrefixFromPath(jobData1.file.name))
					outputF = File(os.path.join(mapDirJob.output, "%s_%s_%s_%s.vcf"%(fileBasenamePrefix, chromosome, overlap_start, overlap_stop)))
					selectVCF1Job = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
										inputF=jobData1.file, outputF=outputF, \
										interval="%s:%s-%s"%(chromosome, overlap_start, overlap_stop),\
										refFastaFList=registerReferenceData.refFastaFList, \
										parentJobLs=[mapDirJob] + jobData1.jobLs, extraDependentInputLs=jobData1.fileLs[1:], transferOutput=False, \
										extraArguments=None, extraArgumentList=None, job_max_memory=2000, walltime=None)
					jobData1 = self.constructJobDataFromJob(selectVCF1Job)
				if overlap_start!=vcfJobDataNode2.key.start or overlap_stop!=vcfJobDataNode2.key.stop:
					fileBasenamePrefix = "%s"%(utils.getFileBasenamePrefixFromPath(jobData2.file.name))
					outputF = File(os.path.join(mapDirJob.output, "%s_%s_%s_%s.vcf"%(fileBasenamePrefix, chromosome, overlap_start, overlap_stop)))
					selectVCF2Job = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
										inputF=jobData2.file, outputF=outputF, \
										interval="%s:%s-%s"%(chromosome, overlap_start, overlap_stop),\
										refFastaFList=registerReferenceData.refFastaFList, \
										parentJobLs=[mapDirJob] + jobData2.jobLs, extraDependentInputLs=jobData2.fileLs[1:], transferOutput=False, \
										extraArguments=None, extraArgumentList=None, job_max_memory=2000, walltime=None)
					jobData2 = self.constructJobDataFromJob(selectVCF2Job)
				
				fileBasenamePrefix = "%s_vs_%s"%(utils.getFileBasenamePrefixFromPath(jobData1.file.name), 
												utils.getFileBasenamePrefixFromPath(jobData2.file.name))
				
#.........這裏部分代碼省略.........
開發者ID:mjmontague,項目名稱:vervet-web,代碼行數:103,代碼來源:CheckTwoVCFOverlapPipeline.py


注:本文中的pymodule.PassingData.perSampleMatchFractionReduceJob方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。