当前位置: 首页>>代码示例>>Python>>正文


Python PassingData.refineGenotypeJob方法代码示例

本文整理汇总了Python中pymodule.PassingData.refineGenotypeJob方法的典型用法代码示例。如果您正苦于以下问题:Python PassingData.refineGenotypeJob方法的具体用法?Python PassingData.refineGenotypeJob怎么用?Python PassingData.refineGenotypeJob使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pymodule.PassingData的用法示例。


在下文中一共展示了PassingData.refineGenotypeJob方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: addRefineGenotypeJobsViaTrioCaller

# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 别名]

#.........这里部分代码省略.........
					transferOutput=False, \
					extraArguments=None, \
					job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=9000).value, \
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
					)
		
		trioCallerOutputFile = File(os.path.join(outputDirJob.folder, \
												'%s.trioCaller.vcf'%(vcfBaseFname)))
		trioCallerJob = self.addTrioCallerJob(trioCallerWrapper=self.trioCallerWrapper, \
				trioCallerPath=self.trioCallerPath, \
				inputVCF=replicateVCFGenotypeColumnsJob.output,\
				pedFile=self.outputPedigreeJob.output, outputVCF=trioCallerOutputFile, \
				inputPhased=False,\
				parentJobLs=[outputDirJob, replicateVCFGenotypeColumnsJob, self.outputPedigreeJob], \
				extraDependentInputLs=[], transferOutput=False, \
				extraArguments=None, \
				job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=9000).value,\
				walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value)	#1.2G memory for 12K loci
		
		returnData.trioCallerJob = trioCallerJob
		
		"""
		2013.07.10 the TrioCaller VCF has some info tags that are not described in VCF header
		"""
		outputFile = File(os.path.join(outputDirJob.folder, \
												'%s.extraInfoDesc.vcf'%(vcfBaseFname)))
		addInfoDescJob = self.addGenericJob(executable=self.AddMissingInfoDescriptionToVCFHeader, \
					inputFile=trioCallerJob.output, \
					inputArgumentOption="-i", \
					outputFile=outputFile, outputArgumentOption="-o", \
					parentJobLs=[outputDirJob, trioCallerJob], \
					extraDependentInputLs=None, extraOutputLs=None, \
					frontArgumentList=None, extraArguments=None, extraArgumentList=None, \
					transferOutput=False, sshDBTunnel=None, \
					key2ObjectForJob=None, objectWithDBArguments=None, \
					no_of_cpus=None, 
					job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=2000, \
							minJobPropertyValue=1000, maxJobPropertyValue=3000).value, \
					walltime=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=500).value,\
					max_walltime=None)
		
		if statDirJob:
			# a CheckGenotypeConcordanceAmongReplicates.py job
			trioCallerReplicateConcordanceFile = File(os.path.join(self.statDirJob.folder, \
									'%s.trioCaller.concordance.tsv'%(vcfBaseFname)))
			returnData.trioCallerReplicateConcordanceJob = self.addGATKJob(executable=self.CalculateConcordanceJava, \
						GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
						GATKAnalysisType="CalculateConcordanceAmongReplicates",\
						inputFile=trioCallerJob.output, inputArgumentOption="--variant", \
						refFastaFList=self.registerReferenceData.refFastaFList, \
						interval=None, \
						outputFile=trioCallerReplicateConcordanceFile, outputArgumentOption="--concordanceStatFname",\
						frontArgumentList=None, extraArguments="--replicateIndividualTag %s"%(self.replicateIndividualTag), \
						extraArgumentList=None, extraOutputLs=None, \
						parentJobLs=[self.statDirJob, trioCallerJob], \
						transferOutput=False, \
						no_of_cpus=None, \
						job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
								baseInputVolume=baseInputVolume, baseJobPropertyValue=6000, \
								minJobPropertyValue=9000, maxJobPropertyValue=16000).value, \
						walltime=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
								baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
								minJobPropertyValue=60, maxJobPropertyValue=1200).value)
			
		
		#2013.06.14
		#merge replicates to generate consensus call
		# (not haplotype-based, as different recombination points across replicate haplotypes make it non-trivial )
		mergeReplicateOutputF = File(os.path.join(outputDirJob.folder, \
									'%s.replicatesMerged.vcf'%(vcfBaseFname)))
		mergeVCFReplicateColumnsJob = self.addMergeVCFReplicateGenotypeColumnsJob(\
							executable=self.MergeVCFReplicateHaplotypesJava,\
							GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
							inputF=addInfoDescJob.output, outputF=mergeReplicateOutputF, \
							replicateIndividualTag=self.replicateIndividualTag, \
							refFastaFList=self.registerReferenceData.refFastaFList, \
							parentJobLs=[outputDirJob, addInfoDescJob], \
							extraDependentInputLs=[], transferOutput=False, \
							extraArguments=None, \
							analysis_type='MergeVCFReplicateGenotypeColumns',\
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=5000, maxJobPropertyValue=9000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value)
		returnData.refineGenotypeJob = mergeVCFReplicateColumnsJob	#the final gentoype job
		returnData.refineGenotypeJob.intervalData = intervalData	#attached so that it could be used by downstream jobs
		return returnData
开发者ID:mjmontague,项目名称:vervet-web,代码行数:104,代码来源:AlignmentToTrioCallPipeline.py

示例2: mapEachInterval

# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 别名]

#.........这里部分代码省略.........
					key2ObjectForJob=None,\
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=600).value)
		#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
		
		#TrioCaller
		# 2013.06.11 replicate individuals who appear in more than 1 families
		round1_IndividualsReplicatedVCF = File( os.path.join(self.mapDirJob.folder, \
											'%s.replicate.vcf'%(intervalFileBasenamePrefix)))
		replicateVCFGenotypeColumnsJob = self.addReplicateVCFGenotypeColumnsJob(\
					executable=self.ReplicateVCFGenotypeColumns, \
					inputF=combineBeagleAndPreBeagleVariantsJob.output, \
					sampleID2FamilyCountF=self.outputReplicatePedigreeJob.sampleID2FamilyCountF, \
					outputF=round1_IndividualsReplicatedVCF, \
					replicateIndividualTag=self.replicateIndividualTag,\
					parentJobLs=[self.outputReplicatePedigreeJob, self.mapDirJob, combineBeagleAndPreBeagleVariantsJob], \
					extraDependentInputLs=None, \
					transferOutput=False, \
					extraArguments=None, \
					job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=9000).value, \
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
					)
		
		refineGenotypeOutputF = File(os.path.join(self.mapDirJob.folder, \
												'%s.trioCaller.vcf'%(intervalFileBasenamePrefix)))
		refineGenotypeJob = self.addTrioCallerJob(trioCallerWrapper=self.trioCallerWrapper, \
				trioCallerPath=self.trioCallerPath, \
				inputVCF=replicateVCFGenotypeColumnsJob.output,\
				pedFile=self.outputReplicatePedigreeJob.output, outputVCF=refineGenotypeOutputF, \
				inputPhased=True,\
				parentJobLs=[self.mapDirJob, replicateVCFGenotypeColumnsJob, self.outputReplicatePedigreeJob], \
				extraDependentInputLs=[], transferOutput=False, \
				extraArguments=None, \
				job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=9000).value,\
				walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value)	#1.2G memory for 12K loci
		
		returnData.refineGenotypeJob = refineGenotypeJob
		
		"""
		2013.07.10 the TrioCaller VCF has some info tags that are not described in VCF header
		"""
		outputFile = File(os.path.join(self.mapDirJob.folder, \
												'%s.extraInfoDesc.vcf'%(intervalFileBasenamePrefix)))
		addInfoDescJob = self.addGenericJob(executable=self.AddMissingInfoDescriptionToVCFHeader, \
					inputFile=refineGenotypeJob.output, \
					inputArgumentOption="-i", \
					outputFile=outputFile, outputArgumentOption="-o", \
					parentJobLs=[self.mapDirJob, refineGenotypeJob], \
					extraDependentInputLs=None, extraOutputLs=None, \
					frontArgumentList=None, extraArguments=None, extraArgumentList=None, \
					transferOutput=False, sshDBTunnel=None, \
					key2ObjectForJob=None, objectWithDBArguments=None, \
					no_of_cpus=None, 
					job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
开发者ID:mjmontague,项目名称:vervet-web,代码行数:70,代码来源:BeagleAndTrioCallerOnVCFWorkflow.py

示例3: addRefineGenotypeJobsViaBeagle

# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 别名]
	def addRefineGenotypeJobsViaBeagle(self, inputFile=None, vcfBaseFname=None, outputDirJob=None, statDirJob=None, \
					refFastaFList=None, intervalData=None,\
					baseInputVolume=450*2000000, realInputVolume=None,\
					parentJobLs=None, \
					transferOutput=False, \
					no_of_cpus=None, job_max_memory=2000, walltime=180, \
					max_walltime=None, **keywords):
		
		returnData = PassingData()
		
		if not hasattr(self, "outputPedigreeJob"):
			#output pedigree, with no replicating certain individuals, no trio/duo splitting
			#plink format
			#for Beagle to read in
			pedigreeFileFormat = 4
			inputFileBasenamePrefix = utils.getFileBasenamePrefixFromPath(inputFile.name)
			pedFile = File(os.path.join(outputDirJob.output, 'pedigree.%s.format%s.txt'%\
									(inputFileBasenamePrefix, pedigreeFileFormat)))
			#sampleID2FamilyCountF = File(os.path.join(self.auxDirJob.output, 'pedigree.sampleID2FamilyCount.%s.format%s.txt'%\
			#						(inputFileBasenamePrefix, pedigreeFileFormat)))
			self.outputPedigreeJob = self.addOutputVRCPedigreeInTFAMGivenOrderFromFileJob(executable=self.OutputVRCPedigreeInTFAMGivenOrderFromFile, \
					inputFile=inputFile, outputFile=pedFile, \
					sampleID2FamilyCountF=None,\
					polymuttDatFile = None,\
					outputFileFormat=pedigreeFileFormat, \
					replicateIndividualTag=self.replicateIndividualTag,\
					treatEveryOneIndependent=self.treatEveryOneIndependent,\
					parentJobLs=parentJobLs + [outputDirJob], \
					extraDependentInputLs=None, transferOutput=True, \
					extraArguments=None, job_max_memory=2000, sshDBTunnel=self.needSSHDBTunnel)
			
		##### Part 2 run Beagle on everyone with reference panel
		# run Beagle
		#refPanelFile=selectDistantMembersVariantsJob.output,\
		outputFnamePrefix = os.path.join(outputDirJob.folder, '%s.beagled'%(vcfBaseFname))
		beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
						inputFile=inputFile, refPanelFile=None,\
						pedFile=self.outputPedigreeJob.output,\
						outputFnamePrefix=outputFnamePrefix, \
						burninIterations=7, phaseIterations=10, \
						noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
						extraArguments=None, extraArgumentList=None,\
						parentJobLs=[outputDirJob, \
									self.outputPedigreeJob] + parentJobLs, \
						transferOutput=False, no_of_cpus=None, \
						job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
						walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
						)
		returnData.beagleJob = beagleJob
		
		#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
		tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
		tabixJob = self.addGenericJob(executable=self.tabix, \
						inputFile=beagleJob.output, inputArgumentOption="",\
						outputFile=None, outputArgumentOption="-o", \
						extraDependentInputLs=None, \
						extraOutputLs=[beagleJob.output, tabixIndexFile], transferOutput=False, \
						frontArgumentList=["-p vcf"], \
						extraArguments=None, \
						extraArgumentList=None, \
						parentJobLs=[beagleJob, outputDirJob],\
						no_of_cpus=None, \
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=180).value)
		
		#borrow PL to from pre-Beagle VCF to genotype 
		outputFile = File(os.path.join(outputDirJob.folder, '%s.beagled.withPL.vcf'%(vcfBaseFname)))
		combineBeagleAndPreBeagleVariantsJob = self.addGATKJob(executable=self.CombineBeagleAndPreBeagleVariantsJava, \
					GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
					GATKAnalysisType="CombineBeagleAndPreBeagleVariants",\
					inputFile=None, inputArgumentOption=None, \
					refFastaFList=refFastaFList, \
					inputFileList=None, argumentForEachFileInInputFileList="--variant",\
					interval=None, outputFile=outputFile, outputArgumentOption="--out", \
					frontArgumentList=None, extraArguments=None, \
					extraArgumentList=["--variant:first", beagleJob.output, "--variant:second", inputFile, \
								"-genotypeMergeOptions PRIORITIZE", "-priority first,second"], \
					extraOutputLs=None, \
					extraDependentInputLs=[inputFile] + tabixJob.outputLs, \
					parentJobLs=[beagleJob, tabixJob]+ parentJobLs, transferOutput=False, \
					no_of_cpus=None, \
					key2ObjectForJob=None,\
					job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
							minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
					walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
							baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
							minJobPropertyValue=60, maxJobPropertyValue=600).value)
		#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
		returnData.refineGenotypeJob = combineBeagleAndPreBeagleVariantsJob	#the final gentoype job
		returnData.refineGenotypeJob.intervalData = intervalData	#attached so that it could be used by downstream jobs
		return returnData
开发者ID:mjmontague,项目名称:vervet-web,代码行数:102,代码来源:AlignmentToTrioCallPipeline.py


注:本文中的pymodule.PassingData.refineGenotypeJob方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。