当前位置: 首页>>代码示例>>Python>>正文


Python PassingData.beagleJob方法代码示例

本文整理汇总了Python中pymodule.PassingData.beagleJob方法的典型用法代码示例。如果您正苦于以下问题:Python PassingData.beagleJob方法的具体用法?Python PassingData.beagleJob怎么用?Python PassingData.beagleJob使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pymodule.PassingData的用法示例。


在下文中一共展示了PassingData.beagleJob方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mapEachInterval

# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import beagleJob [as 别名]

#.........这里部分代码省略.........
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=5000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=600).value)
		
		# select the high-coverage members
		outputVCF = File(os.path.join(self.highCoveragePanelDirJob.output, \
									'%s.minCoverage%s.maxPairwiseKinship%s.refPanel.beagled.vcf'%\
									(intervalFileBasenamePrefix, self.minCoverageForRefPanel, self.maxPairwiseKinship)))
		#selectVariants would re-generate AC, AF so that TrioCaller could read it.
		#samtools uses 'AC1' instead of AC, 'AF1' instead of AF.
		selectDistantMembersVariantsJob = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
				inputF=beagleOnHighCoverageJob.output, outputF=outputVCF, \
				refFastaFList=self.registerReferenceData.refFastaFList, \
				sampleIDKeepFile=self.selectDistantMembersFromGenotypeFileJob.output,\
				parentJobLs=[self.highCoveragePanelDirJob, beagleOnHighCoverageJob, self.selectDistantMembersFromGenotypeFileJob,\
							tabixOnHighCoverageVCFJob], \
				extraDependentInputLs=[tabixOnHighCoverageVCFJob.output], transferOutput=False, \
				extraArguments=None, \
				job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=7000).value,\
				walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value)
		
		
		##### Part 2 run Beagle on everyone with reference panel
		# run Beagle
		#refPanelFile=selectDistantMembersVariantsJob.output,\
		outputFnamePrefix = os.path.join(self.mapDirJob.folder, '%s.beagled'%(intervalFileBasenamePrefix))
		beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
						inputFile=VCFJobData.file, refPanelFile=None,\
						pedFile=self.outputPedigreeJob.output,\
						outputFnamePrefix=outputFnamePrefix, \
						burninIterations=7, phaseIterations=10, \
						noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
						extraArguments=None, extraArgumentList=None,\
						parentJobLs=[self.mapDirJob, \
									self.outputPedigreeJob] + VCFJobData.jobLs, \
						transferOutput=False, no_of_cpus=None, \
						job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
						walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
						)
		returnData.beagleJob = beagleJob
		
		#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
		tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
		tabixJob = self.addGenericJob(executable=self.tabix, \
						inputFile=beagleJob.output, inputArgumentOption="",\
						outputFile=None, outputArgumentOption="-o", \
						extraDependentInputLs=None, \
						extraOutputLs=[tabixIndexFile], transferOutput=False, frontArgumentList=["-p vcf"], \
						extraArguments=None, \
						extraArgumentList=[], \
						parentJobLs=[beagleJob, self.mapDirJob],\
						no_of_cpus=None, \
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
开发者ID:mjmontague,项目名称:vervet-web,代码行数:70,代码来源:BeagleAndTrioCallerOnVCFWorkflow.py

示例2: addRefineGenotypeJobsViaBeagle

# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import beagleJob [as 别名]
	def addRefineGenotypeJobsViaBeagle(self, inputFile=None, vcfBaseFname=None, outputDirJob=None, statDirJob=None, \
					refFastaFList=None, intervalData=None,\
					baseInputVolume=450*2000000, realInputVolume=None,\
					parentJobLs=None, \
					transferOutput=False, \
					no_of_cpus=None, job_max_memory=2000, walltime=180, \
					max_walltime=None, **keywords):
		
		returnData = PassingData()
		
		if not hasattr(self, "outputPedigreeJob"):
			#output pedigree, with no replicating certain individuals, no trio/duo splitting
			#plink format
			#for Beagle to read in
			pedigreeFileFormat = 4
			inputFileBasenamePrefix = utils.getFileBasenamePrefixFromPath(inputFile.name)
			pedFile = File(os.path.join(outputDirJob.output, 'pedigree.%s.format%s.txt'%\
									(inputFileBasenamePrefix, pedigreeFileFormat)))
			#sampleID2FamilyCountF = File(os.path.join(self.auxDirJob.output, 'pedigree.sampleID2FamilyCount.%s.format%s.txt'%\
			#						(inputFileBasenamePrefix, pedigreeFileFormat)))
			self.outputPedigreeJob = self.addOutputVRCPedigreeInTFAMGivenOrderFromFileJob(executable=self.OutputVRCPedigreeInTFAMGivenOrderFromFile, \
					inputFile=inputFile, outputFile=pedFile, \
					sampleID2FamilyCountF=None,\
					polymuttDatFile = None,\
					outputFileFormat=pedigreeFileFormat, \
					replicateIndividualTag=self.replicateIndividualTag,\
					treatEveryOneIndependent=self.treatEveryOneIndependent,\
					parentJobLs=parentJobLs + [outputDirJob], \
					extraDependentInputLs=None, transferOutput=True, \
					extraArguments=None, job_max_memory=2000, sshDBTunnel=self.needSSHDBTunnel)
			
		##### Part 2 run Beagle on everyone with reference panel
		# run Beagle
		#refPanelFile=selectDistantMembersVariantsJob.output,\
		outputFnamePrefix = os.path.join(outputDirJob.folder, '%s.beagled'%(vcfBaseFname))
		beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
						inputFile=inputFile, refPanelFile=None,\
						pedFile=self.outputPedigreeJob.output,\
						outputFnamePrefix=outputFnamePrefix, \
						burninIterations=7, phaseIterations=10, \
						noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
						extraArguments=None, extraArgumentList=None,\
						parentJobLs=[outputDirJob, \
									self.outputPedigreeJob] + parentJobLs, \
						transferOutput=False, no_of_cpus=None, \
						job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
						walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
						)
		returnData.beagleJob = beagleJob
		
		#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
		tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
		tabixJob = self.addGenericJob(executable=self.tabix, \
						inputFile=beagleJob.output, inputArgumentOption="",\
						outputFile=None, outputArgumentOption="-o", \
						extraDependentInputLs=None, \
						extraOutputLs=[beagleJob.output, tabixIndexFile], transferOutput=False, \
						frontArgumentList=["-p vcf"], \
						extraArguments=None, \
						extraArgumentList=None, \
						parentJobLs=[beagleJob, outputDirJob],\
						no_of_cpus=None, \
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=180).value)
		
		#borrow PL to from pre-Beagle VCF to genotype 
		outputFile = File(os.path.join(outputDirJob.folder, '%s.beagled.withPL.vcf'%(vcfBaseFname)))
		combineBeagleAndPreBeagleVariantsJob = self.addGATKJob(executable=self.CombineBeagleAndPreBeagleVariantsJava, \
					GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
					GATKAnalysisType="CombineBeagleAndPreBeagleVariants",\
					inputFile=None, inputArgumentOption=None, \
					refFastaFList=refFastaFList, \
					inputFileList=None, argumentForEachFileInInputFileList="--variant",\
					interval=None, outputFile=outputFile, outputArgumentOption="--out", \
					frontArgumentList=None, extraArguments=None, \
					extraArgumentList=["--variant:first", beagleJob.output, "--variant:second", inputFile, \
								"-genotypeMergeOptions PRIORITIZE", "-priority first,second"], \
					extraOutputLs=None, \
					extraDependentInputLs=[inputFile] + tabixJob.outputLs, \
					parentJobLs=[beagleJob, tabixJob]+ parentJobLs, transferOutput=False, \
					no_of_cpus=None, \
					key2ObjectForJob=None,\
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=600).value)
		#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
		returnData.refineGenotypeJob = combineBeagleAndPreBeagleVariantsJob	#the final gentoype job
		returnData.refineGenotypeJob.intervalData = intervalData	#attached so that it could be used by downstream jobs
		return returnData
开发者ID:mjmontague,项目名称:vervet-web,代码行数:102,代码来源:AlignmentToTrioCallPipeline.py


注:本文中的pymodule.PassingData.beagleJob方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。