本文整理汇总了Python中pymodule.PassingData.beagleJob方法的典型用法代码示例。如果您正苦于以下问题:Python PassingData.beagleJob方法的具体用法?Python PassingData.beagleJob怎么用?Python PassingData.beagleJob使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pymodule.PassingData
的用法示例。
在下文中一共展示了PassingData.beagleJob方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mapEachInterval
# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import beagleJob [as 别名]
#.........这里部分代码省略.........
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=5000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=600).value)
# select the high-coverage members
outputVCF = File(os.path.join(self.highCoveragePanelDirJob.output, \
'%s.minCoverage%s.maxPairwiseKinship%s.refPanel.beagled.vcf'%\
(intervalFileBasenamePrefix, self.minCoverageForRefPanel, self.maxPairwiseKinship)))
#selectVariants would re-generate AC, AF so that TrioCaller could read it.
#samtools uses 'AC1' instead of AC, 'AF1' instead of AF.
selectDistantMembersVariantsJob = self.addSelectVariantsJob(SelectVariantsJava=self.SelectVariantsJava, \
inputF=beagleOnHighCoverageJob.output, outputF=outputVCF, \
refFastaFList=self.registerReferenceData.refFastaFList, \
sampleIDKeepFile=self.selectDistantMembersFromGenotypeFileJob.output,\
parentJobLs=[self.highCoveragePanelDirJob, beagleOnHighCoverageJob, self.selectDistantMembersFromGenotypeFileJob,\
tabixOnHighCoverageVCFJob], \
extraDependentInputLs=[tabixOnHighCoverageVCFJob.output], transferOutput=False, \
extraArguments=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=7000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value)
##### Part 2 run Beagle on everyone with reference panel
# run Beagle
#refPanelFile=selectDistantMembersVariantsJob.output,\
outputFnamePrefix = os.path.join(self.mapDirJob.folder, '%s.beagled'%(intervalFileBasenamePrefix))
beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
inputFile=VCFJobData.file, refPanelFile=None,\
pedFile=self.outputPedigreeJob.output,\
outputFnamePrefix=outputFnamePrefix, \
burninIterations=7, phaseIterations=10, \
noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
extraArguments=None, extraArgumentList=None,\
parentJobLs=[self.mapDirJob, \
self.outputPedigreeJob] + VCFJobData.jobLs, \
transferOutput=False, no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
)
returnData.beagleJob = beagleJob
#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
tabixJob = self.addGenericJob(executable=self.tabix, \
inputFile=beagleJob.output, inputArgumentOption="",\
outputFile=None, outputArgumentOption="-o", \
extraDependentInputLs=None, \
extraOutputLs=[tabixIndexFile], transferOutput=False, frontArgumentList=["-p vcf"], \
extraArguments=None, \
extraArgumentList=[], \
parentJobLs=[beagleJob, self.mapDirJob],\
no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
示例2: addRefineGenotypeJobsViaBeagle
# 需要导入模块: from pymodule import PassingData [as 别名]
# 或者: from pymodule.PassingData import beagleJob [as 别名]
def addRefineGenotypeJobsViaBeagle(self, inputFile=None, vcfBaseFname=None, outputDirJob=None, statDirJob=None, \
refFastaFList=None, intervalData=None,\
baseInputVolume=450*2000000, realInputVolume=None,\
parentJobLs=None, \
transferOutput=False, \
no_of_cpus=None, job_max_memory=2000, walltime=180, \
max_walltime=None, **keywords):
returnData = PassingData()
if not hasattr(self, "outputPedigreeJob"):
#output pedigree, with no replicating certain individuals, no trio/duo splitting
#plink format
#for Beagle to read in
pedigreeFileFormat = 4
inputFileBasenamePrefix = utils.getFileBasenamePrefixFromPath(inputFile.name)
pedFile = File(os.path.join(outputDirJob.output, 'pedigree.%s.format%s.txt'%\
(inputFileBasenamePrefix, pedigreeFileFormat)))
#sampleID2FamilyCountF = File(os.path.join(self.auxDirJob.output, 'pedigree.sampleID2FamilyCount.%s.format%s.txt'%\
# (inputFileBasenamePrefix, pedigreeFileFormat)))
self.outputPedigreeJob = self.addOutputVRCPedigreeInTFAMGivenOrderFromFileJob(executable=self.OutputVRCPedigreeInTFAMGivenOrderFromFile, \
inputFile=inputFile, outputFile=pedFile, \
sampleID2FamilyCountF=None,\
polymuttDatFile = None,\
outputFileFormat=pedigreeFileFormat, \
replicateIndividualTag=self.replicateIndividualTag,\
treatEveryOneIndependent=self.treatEveryOneIndependent,\
parentJobLs=parentJobLs + [outputDirJob], \
extraDependentInputLs=None, transferOutput=True, \
extraArguments=None, job_max_memory=2000, sshDBTunnel=self.needSSHDBTunnel)
##### Part 2 run Beagle on everyone with reference panel
# run Beagle
#refPanelFile=selectDistantMembersVariantsJob.output,\
outputFnamePrefix = os.path.join(outputDirJob.folder, '%s.beagled'%(vcfBaseFname))
beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
inputFile=inputFile, refPanelFile=None,\
pedFile=self.outputPedigreeJob.output,\
outputFnamePrefix=outputFnamePrefix, \
burninIterations=7, phaseIterations=10, \
noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
extraArguments=None, extraArgumentList=None,\
parentJobLs=[outputDirJob, \
self.outputPedigreeJob] + parentJobLs, \
transferOutput=False, no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
)
returnData.beagleJob = beagleJob
#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
tabixJob = self.addGenericJob(executable=self.tabix, \
inputFile=beagleJob.output, inputArgumentOption="",\
outputFile=None, outputArgumentOption="-o", \
extraDependentInputLs=None, \
extraOutputLs=[beagleJob.output, tabixIndexFile], transferOutput=False, \
frontArgumentList=["-p vcf"], \
extraArguments=None, \
extraArgumentList=None, \
parentJobLs=[beagleJob, outputDirJob],\
no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=180).value)
#borrow PL to from pre-Beagle VCF to genotype
outputFile = File(os.path.join(outputDirJob.folder, '%s.beagled.withPL.vcf'%(vcfBaseFname)))
combineBeagleAndPreBeagleVariantsJob = self.addGATKJob(executable=self.CombineBeagleAndPreBeagleVariantsJava, \
GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
GATKAnalysisType="CombineBeagleAndPreBeagleVariants",\
inputFile=None, inputArgumentOption=None, \
refFastaFList=refFastaFList, \
inputFileList=None, argumentForEachFileInInputFileList="--variant",\
interval=None, outputFile=outputFile, outputArgumentOption="--out", \
frontArgumentList=None, extraArguments=None, \
extraArgumentList=["--variant:first", beagleJob.output, "--variant:second", inputFile, \
"-genotypeMergeOptions PRIORITIZE", "-priority first,second"], \
extraOutputLs=None, \
extraDependentInputLs=[inputFile] + tabixJob.outputLs, \
parentJobLs=[beagleJob, tabixJob]+ parentJobLs, transferOutput=False, \
no_of_cpus=None, \
key2ObjectForJob=None,\
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=600).value)
#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
returnData.refineGenotypeJob = combineBeagleAndPreBeagleVariantsJob #the final gentoype job
returnData.refineGenotypeJob.intervalData = intervalData #attached so that it could be used by downstream jobs
return returnData