本文整理匯總了Python中pymodule.PassingData.refineGenotypeJob方法的典型用法代碼示例。如果您正苦於以下問題:Python PassingData.refineGenotypeJob方法的具體用法?Python PassingData.refineGenotypeJob怎麽用?Python PassingData.refineGenotypeJob使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pymodule.PassingData
的用法示例。
在下文中一共展示了PassingData.refineGenotypeJob方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: addRefineGenotypeJobsViaTrioCaller
# 需要導入模塊: from pymodule import PassingData [as 別名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 別名]
#.........這裏部分代碼省略.........
transferOutput=False, \
extraArguments=None, \
job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=9000).value, \
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
)
trioCallerOutputFile = File(os.path.join(outputDirJob.folder, \
'%s.trioCaller.vcf'%(vcfBaseFname)))
trioCallerJob = self.addTrioCallerJob(trioCallerWrapper=self.trioCallerWrapper, \
trioCallerPath=self.trioCallerPath, \
inputVCF=replicateVCFGenotypeColumnsJob.output,\
pedFile=self.outputPedigreeJob.output, outputVCF=trioCallerOutputFile, \
inputPhased=False,\
parentJobLs=[outputDirJob, replicateVCFGenotypeColumnsJob, self.outputPedigreeJob], \
extraDependentInputLs=[], transferOutput=False, \
extraArguments=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=9000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value) #1.2G memory for 12K loci
returnData.trioCallerJob = trioCallerJob
"""
2013.07.10 the TrioCaller VCF has some info tags that are not described in VCF header
"""
outputFile = File(os.path.join(outputDirJob.folder, \
'%s.extraInfoDesc.vcf'%(vcfBaseFname)))
addInfoDescJob = self.addGenericJob(executable=self.AddMissingInfoDescriptionToVCFHeader, \
inputFile=trioCallerJob.output, \
inputArgumentOption="-i", \
outputFile=outputFile, outputArgumentOption="-o", \
parentJobLs=[outputDirJob, trioCallerJob], \
extraDependentInputLs=None, extraOutputLs=None, \
frontArgumentList=None, extraArguments=None, extraArgumentList=None, \
transferOutput=False, sshDBTunnel=None, \
key2ObjectForJob=None, objectWithDBArguments=None, \
no_of_cpus=None,
job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=2000, \
minJobPropertyValue=1000, maxJobPropertyValue=3000).value, \
walltime=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=500).value,\
max_walltime=None)
if statDirJob:
# a CheckGenotypeConcordanceAmongReplicates.py job
trioCallerReplicateConcordanceFile = File(os.path.join(self.statDirJob.folder, \
'%s.trioCaller.concordance.tsv'%(vcfBaseFname)))
returnData.trioCallerReplicateConcordanceJob = self.addGATKJob(executable=self.CalculateConcordanceJava, \
GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
GATKAnalysisType="CalculateConcordanceAmongReplicates",\
inputFile=trioCallerJob.output, inputArgumentOption="--variant", \
refFastaFList=self.registerReferenceData.refFastaFList, \
interval=None, \
outputFile=trioCallerReplicateConcordanceFile, outputArgumentOption="--concordanceStatFname",\
frontArgumentList=None, extraArguments="--replicateIndividualTag %s"%(self.replicateIndividualTag), \
extraArgumentList=None, extraOutputLs=None, \
parentJobLs=[self.statDirJob, trioCallerJob], \
transferOutput=False, \
no_of_cpus=None, \
job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=6000, \
minJobPropertyValue=9000, maxJobPropertyValue=16000).value, \
walltime=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value)
#2013.06.14
#merge replicates to generate consensus call
# (not haplotype-based, as different recombination points across replicate haplotypes make it non-trivial )
mergeReplicateOutputF = File(os.path.join(outputDirJob.folder, \
'%s.replicatesMerged.vcf'%(vcfBaseFname)))
mergeVCFReplicateColumnsJob = self.addMergeVCFReplicateGenotypeColumnsJob(\
executable=self.MergeVCFReplicateHaplotypesJava,\
GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
inputF=addInfoDescJob.output, outputF=mergeReplicateOutputF, \
replicateIndividualTag=self.replicateIndividualTag, \
refFastaFList=self.registerReferenceData.refFastaFList, \
parentJobLs=[outputDirJob, addInfoDescJob], \
extraDependentInputLs=[], transferOutput=False, \
extraArguments=None, \
analysis_type='MergeVCFReplicateGenotypeColumns',\
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=5000, maxJobPropertyValue=9000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value)
returnData.refineGenotypeJob = mergeVCFReplicateColumnsJob #the final gentoype job
returnData.refineGenotypeJob.intervalData = intervalData #attached so that it could be used by downstream jobs
return returnData
示例2: mapEachInterval
# 需要導入模塊: from pymodule import PassingData [as 別名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 別名]
#.........這裏部分代碼省略.........
key2ObjectForJob=None,\
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=600).value)
#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
#TrioCaller
# 2013.06.11 replicate individuals who appear in more than 1 families
round1_IndividualsReplicatedVCF = File( os.path.join(self.mapDirJob.folder, \
'%s.replicate.vcf'%(intervalFileBasenamePrefix)))
replicateVCFGenotypeColumnsJob = self.addReplicateVCFGenotypeColumnsJob(\
executable=self.ReplicateVCFGenotypeColumns, \
inputF=combineBeagleAndPreBeagleVariantsJob.output, \
sampleID2FamilyCountF=self.outputReplicatePedigreeJob.sampleID2FamilyCountF, \
outputF=round1_IndividualsReplicatedVCF, \
replicateIndividualTag=self.replicateIndividualTag,\
parentJobLs=[self.outputReplicatePedigreeJob, self.mapDirJob, combineBeagleAndPreBeagleVariantsJob], \
extraDependentInputLs=None, \
transferOutput=False, \
extraArguments=None, \
job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=9000).value, \
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
)
refineGenotypeOutputF = File(os.path.join(self.mapDirJob.folder, \
'%s.trioCaller.vcf'%(intervalFileBasenamePrefix)))
refineGenotypeJob = self.addTrioCallerJob(trioCallerWrapper=self.trioCallerWrapper, \
trioCallerPath=self.trioCallerPath, \
inputVCF=replicateVCFGenotypeColumnsJob.output,\
pedFile=self.outputReplicatePedigreeJob.output, outputVCF=refineGenotypeOutputF, \
inputPhased=True,\
parentJobLs=[self.mapDirJob, replicateVCFGenotypeColumnsJob, self.outputReplicatePedigreeJob], \
extraDependentInputLs=[], transferOutput=False, \
extraArguments=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=9000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value) #1.2G memory for 12K loci
returnData.refineGenotypeJob = refineGenotypeJob
"""
2013.07.10 the TrioCaller VCF has some info tags that are not described in VCF header
"""
outputFile = File(os.path.join(self.mapDirJob.folder, \
'%s.extraInfoDesc.vcf'%(intervalFileBasenamePrefix)))
addInfoDescJob = self.addGenericJob(executable=self.AddMissingInfoDescriptionToVCFHeader, \
inputFile=refineGenotypeJob.output, \
inputArgumentOption="-i", \
outputFile=outputFile, outputArgumentOption="-o", \
parentJobLs=[self.mapDirJob, refineGenotypeJob], \
extraDependentInputLs=None, extraOutputLs=None, \
frontArgumentList=None, extraArguments=None, extraArgumentList=None, \
transferOutput=False, sshDBTunnel=None, \
key2ObjectForJob=None, objectWithDBArguments=None, \
no_of_cpus=None,
job_max_memory=self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
示例3: addRefineGenotypeJobsViaBeagle
# 需要導入模塊: from pymodule import PassingData [as 別名]
# 或者: from pymodule.PassingData import refineGenotypeJob [as 別名]
def addRefineGenotypeJobsViaBeagle(self, inputFile=None, vcfBaseFname=None, outputDirJob=None, statDirJob=None, \
refFastaFList=None, intervalData=None,\
baseInputVolume=450*2000000, realInputVolume=None,\
parentJobLs=None, \
transferOutput=False, \
no_of_cpus=None, job_max_memory=2000, walltime=180, \
max_walltime=None, **keywords):
returnData = PassingData()
if not hasattr(self, "outputPedigreeJob"):
#output pedigree, with no replicating certain individuals, no trio/duo splitting
#plink format
#for Beagle to read in
pedigreeFileFormat = 4
inputFileBasenamePrefix = utils.getFileBasenamePrefixFromPath(inputFile.name)
pedFile = File(os.path.join(outputDirJob.output, 'pedigree.%s.format%s.txt'%\
(inputFileBasenamePrefix, pedigreeFileFormat)))
#sampleID2FamilyCountF = File(os.path.join(self.auxDirJob.output, 'pedigree.sampleID2FamilyCount.%s.format%s.txt'%\
# (inputFileBasenamePrefix, pedigreeFileFormat)))
self.outputPedigreeJob = self.addOutputVRCPedigreeInTFAMGivenOrderFromFileJob(executable=self.OutputVRCPedigreeInTFAMGivenOrderFromFile, \
inputFile=inputFile, outputFile=pedFile, \
sampleID2FamilyCountF=None,\
polymuttDatFile = None,\
outputFileFormat=pedigreeFileFormat, \
replicateIndividualTag=self.replicateIndividualTag,\
treatEveryOneIndependent=self.treatEveryOneIndependent,\
parentJobLs=parentJobLs + [outputDirJob], \
extraDependentInputLs=None, transferOutput=True, \
extraArguments=None, job_max_memory=2000, sshDBTunnel=self.needSSHDBTunnel)
##### Part 2 run Beagle on everyone with reference panel
# run Beagle
#refPanelFile=selectDistantMembersVariantsJob.output,\
outputFnamePrefix = os.path.join(outputDirJob.folder, '%s.beagled'%(vcfBaseFname))
beagleJob = self.addBeagle4Job(executable=self.BeagleJava, \
inputFile=inputFile, refPanelFile=None,\
pedFile=self.outputPedigreeJob.output,\
outputFnamePrefix=outputFnamePrefix, \
burninIterations=7, phaseIterations=10, \
noOfSamplingHaplotypesPerSample=4, duoscale=2, trioscale=2, \
extraArguments=None, extraArgumentList=None,\
parentJobLs=[outputDirJob, \
self.outputPedigreeJob] + parentJobLs, \
transferOutput=False, no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=4000, maxJobPropertyValue=13000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=1200).value,\
)
returnData.beagleJob = beagleJob
#index .vcf.gz, output of beagle, without index, GATK can't work on gzipped vcf
tabixIndexFile = File('%s.tbi'%(beagleJob.output.name))
tabixJob = self.addGenericJob(executable=self.tabix, \
inputFile=beagleJob.output, inputArgumentOption="",\
outputFile=None, outputArgumentOption="-o", \
extraDependentInputLs=None, \
extraOutputLs=[beagleJob.output, tabixIndexFile], transferOutput=False, \
frontArgumentList=["-p vcf"], \
extraArguments=None, \
extraArgumentList=None, \
parentJobLs=[beagleJob, outputDirJob],\
no_of_cpus=None, \
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=180).value)
#borrow PL to from pre-Beagle VCF to genotype
outputFile = File(os.path.join(outputDirJob.folder, '%s.beagled.withPL.vcf'%(vcfBaseFname)))
combineBeagleAndPreBeagleVariantsJob = self.addGATKJob(executable=self.CombineBeagleAndPreBeagleVariantsJava, \
GenomeAnalysisTKJar=self.GenomeAnalysisTKJar, \
GATKAnalysisType="CombineBeagleAndPreBeagleVariants",\
inputFile=None, inputArgumentOption=None, \
refFastaFList=refFastaFList, \
inputFileList=None, argumentForEachFileInInputFileList="--variant",\
interval=None, outputFile=outputFile, outputArgumentOption="--out", \
frontArgumentList=None, extraArguments=None, \
extraArgumentList=["--variant:first", beagleJob.output, "--variant:second", inputFile, \
"-genotypeMergeOptions PRIORITIZE", "-priority first,second"], \
extraOutputLs=None, \
extraDependentInputLs=[inputFile] + tabixJob.outputLs, \
parentJobLs=[beagleJob, tabixJob]+ parentJobLs, transferOutput=False, \
no_of_cpus=None, \
key2ObjectForJob=None,\
job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
minJobPropertyValue=2000, maxJobPropertyValue=4000).value,\
walltime= self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
minJobPropertyValue=60, maxJobPropertyValue=600).value)
#do not use "--variant:beagle" to name your vcf file as GATK would think it's in Beagle format
returnData.refineGenotypeJob = combineBeagleAndPreBeagleVariantsJob #the final gentoype job
returnData.refineGenotypeJob.intervalData = intervalData #attached so that it could be used by downstream jobs
return returnData