本文整理汇总了Python中pandatools.Client.addDataset方法的典型用法代码示例。如果您正苦于以下问题:Python Client.addDataset方法的具体用法?Python Client.addDataset怎么用?Python Client.addDataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandatools.Client
的用法示例。
在下文中一共展示了Client.addDataset方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: master_prepare
# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
def master_prepare(self,app,appconfig):
'''Prepare the master job'''
from pandatools import Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec
job = app._getParent()
logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.'))
# set chirp variables
if configPanda['chirpconfig'] or configPanda['chirpserver']:
setChirpVariables()
# Pack inputsandbox
inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null')
inpw = job.getInputWorkspace()
# add user script to inputsandbox
if hasattr(job.application.exe, "name"):
if not job.application.exe in job.inputsandbox:
job.inputsandbox.append(job.application.exe)
for fname in [f.name for f in job.inputsandbox]:
fname.rstrip(os.sep)
path = fname[:fname.rfind(os.sep)]
f = fname[fname.rfind(os.sep)+1:]
rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
if rc:
logger.error('Packing inputsandbox failed with status %d',rc)
logger.error(output)
raise ApplicationConfigurationError('Packing inputsandbox failed.')
if len(job.inputsandbox) > 0:
rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox)))
if rc:
logger.error('Packing inputsandbox failed with status %d',rc)
logger.error(output)
raise ApplicationConfigurationError('Packing inputsandbox failed.')
inputsandbox += ".gz"
else:
inputsandbox = None
# Upload Inputsandbox
if inputsandbox:
logger.debug('Uploading source tarball ...')
uploadSources(inpw.getPath(),os.path.basename(inputsandbox))
self.inputsandbox = inputsandbox
else:
self.inputsandbox = None
# input dataset
if job.inputdata:
if job.inputdata._name != 'DQ2Dataset':
raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets')
# run brokerage here if not splitting
if not job.splitter:
from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
runPandaBrokerage(job)
elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']:
raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter')
if job.backend.site == 'AUTO':
raise ApplicationConfigurationError('site is still AUTO after brokerage!')
# output dataset
if job.outputdata:
if job.outputdata._name != 'DQ2OutputDataset':
raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
else:
logger.info('Adding missing DQ2OutputDataset')
job.outputdata = DQ2OutputDataset()
job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)
self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']
try:
Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation)
logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation))
dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation)
except exceptions.SystemExit:
raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
# handle the libds
if job.backend.libds:
self.libDataset = job.backend.libds
self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
self.library = self.fileBO.lfn
elif job.backend.bexec:
self.libDataset = job.outputdata.datasetname+'.lib'
self.library = '%s.tgz' % self.libDataset
try:
Client.addDataset(self.libDataset,False,location=self.outDsLocation)
dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation)
logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation))
except exceptions.SystemExit:
raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1]))
# collect extOutFiles
self.extOutFile = []
#.........这里部分代码省略.........
示例2: master_prepare
# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
#.........这里部分代码省略.........
## if cloudSite==eachCloud:
## cloud=cloudID
## outclouds.append(cloud)
## break
## print outclouds
## # finally, matching with user's wishes
## if len(outclouds)>0:
## if not job.backend.requirements.cloud: # no user wish, update
## job.backend.requirements.cloud=outclouds[0]
## else:
## try:
## assert job.backend.requirements.cloud in outclouds
## except:
## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))
from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
runPandaBrokerage(job)
if job.backend.site == 'AUTO':
raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!')
# output dataset preparation and registration
try:
outDsLocation = Client.PandaSites[job.backend.site]['ddm']
except:
raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site)
if not app.dryrun:
for outtype in app.outputpaths.keys():
dset=string.replace(app.outputpaths[outtype],"/",".")
dset=dset[1:]
# dataset registration must be done only once.
print "registering output dataset %s at %s" % (dset,outDsLocation)
try:
Client.addDataset(dset,False,location=outDsLocation)
dq2_set_dataset_lifetime(dset, location=outDsLocation)
except:
raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset)
# extend registration to build job lib dataset:
print "registering output dataset %s at %s" % (self.libDataset,outDsLocation)
try:
Client.addDataset(self.libDataset,False,location=outDsLocation)
dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
except:
raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset)
###
cacheVer = "-AtlasProduction_" + str(app.prod_release)
logger.debug("master job submit?")
self.outsite=job.backend.site
if app.se_name and app.se_name != "none" and not self.outsite:
self.outsite=app.se_name
# create build job
jspec = JobSpec()
jspec.jobDefinitionID = job.id
jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel
jspec.homepackage = 'AnalysisTransforms'+cacheVer#+nightVer
jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
jspec.destinationDBlock = self.libDataset
jspec.destinationSE = job.backend.site
jspec.prodSourceLabel = 'panda'
jspec.assignedPriority = 2000
jspec.computingSite = job.backend.site
jspec.cloud = job.backend.requirements.cloud
# jspec.jobParameters = self.args not known yet
jspec.jobParameters = '-o %s' % (self.library)
if app.userarea:
print app.userarea
jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea))
else:
jspec.jobParameters += ' -i %s' % (sources)
jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL)
if matchURL:
jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)
fout = FileSpec()
fout.lfn = self.library
fout.type = 'output'
fout.dataset = self.libDataset
fout.destinationDBlock = self.libDataset
jspec.addFile(fout)
flog = FileSpec()
flog.lfn = '%s.log.tgz' % self.libDataset
flog.type = 'log'
flog.dataset = self.libDataset
flog.destinationDBlock = self.libDataset
jspec.addFile(flog)
#print "MASTER JOB DETAILS:",jspec.jobParameters
return jspec
示例3: prepare
# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
"""Prepare the specific aspec of each subjob.
Returns: subjobconfig list of objects understood by backends."""
from pandatools import Client
from pandatools import AthenaUtils
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec
from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs
# make sure we have the correct siteType
refreshPandaSpecs()
job = app._getParent()
masterjob = job._getRoot()
logger.debug('ProdTransPandaRTHandler prepare called for %s',
job.getFQID('.'))
job.backend.actualCE = job.backend.site
job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud']
# check that the site is in a submit-able status
if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
allowed_sites = job.backend.list_ddm_sites()
try:
outDsLocation = Client.PandaSites[job.backend.site]['ddm']
tmpDsExist = False
if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')):
#if Client.getDatasets(job.outputdata.datasetname):
if getDatasets(job.outputdata.datasetname):
tmpDsExist = True
logger.info('Re-using output dataset %s'%job.outputdata.datasetname)
if not configPanda['specialHandling']=='ddm:rucio' and not configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'):
Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist)
logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation))
dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
except exceptions.SystemExit:
raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
# JobSpec.
jspec = JobSpec()
jspec.currentPriority = app.priority
jspec.jobDefinitionID = masterjob.id
jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
jspec.coreCount = app.core_count
jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
jspec.homepackage = app.home_package
jspec.transformation = app.transformation
jspec.destinationDBlock = job.outputdata.datasetname
if job.outputdata.location:
jspec.destinationSE = job.outputdata.location
else:
jspec.destinationSE = job.backend.site
if job.inputdata:
jspec.prodDBlock = job.inputdata.dataset[0]
else:
jspec.prodDBlock = 'NULL'
if app.prod_source_label:
jspec.prodSourceLabel = app.prod_source_label
else:
jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
jspec.processingType = configPanda['processingType']
jspec.specialHandling = configPanda['specialHandling']
jspec.computingSite = job.backend.site
jspec.cloud = job.backend.requirements.cloud
jspec.cmtConfig = app.atlas_cmtconfig
if app.dbrelease == 'LATEST':
try:
latest_dbrelease = getLatestDBReleaseCaching()
except:
from pandatools import Client
latest_dbrelease = Client.getLatestDBRelease()
m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
if m:
self.dbrelease_dataset = m.group(1)
self.dbrelease = m.group(2)
else:
raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.")
else:
self.dbrelease_dataset = app.dbrelease_dataset
self.dbrelease = app.dbrelease
jspec.jobParameters = app.job_parameters
if self.dbrelease:
if self.dbrelease == 'current':
jspec.jobParameters += ' --DBRelease=current'
else:
if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
else:
jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
dbspec = FileSpec()
dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
dbspec.dataset = self.dbrelease_dataset
dbspec.prodDBlock = jspec.prodDBlock
dbspec.type = 'input'
jspec.addFile(dbspec)
#.........这里部分代码省略.........
示例4: retry
# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
#.........这里部分代码省略.........
if wrongSetup:
errMsg += 'You need to have the same runtime env as before since all job spec need to be re-created to send jobs to a new site. '
errMsg += 'Please setup Athena correctly and restart pbook'
tmpLog.error(errMsg)
return
# test mode
if noSubmit:
continue
# invoke pathena/prun to send job to new site
if (newSite or newOpts != {}) and retryJobs[0].processingType != 'usermerge':
# set parent jobID and jobsetID
newOpts['provenanceID'] = retryJobs[0].jobExecutionID
newOpts['panda_parentJobsetID'] = retryJobs[0].sourceSite
tmpLog.info("Constructing job spec again to be sent to another site ...")
comStat= PsubUtils.execWithModifiedParams(retryJobs,newOpts,self.verbose,newSite)
if comStat == 0:
# update database
time.sleep(2)
self.sync()
else:
tmpLog.error("Failed to submit jobs to Panda server")
return
# register datasets
tmpOutDsLocation = Client.PandaSites[retryJobs[-1].computingSite]['ddm']
addedDataset = []
shadowDSname = None
for tmpFile in retryJobs[-1].Files:
if tmpFile.type in ['output','log'] and tmpFile.dataset.endswith('/'):
# add shadow
"""
removed shadow
if shadowDSname == None and tmpFile.type == 'log':
shadowDSname = "%s%s" % (tmpFile.destinationDBlock,Client.suffixShadow)
Client.addDataset(shadowDSname,self.verbose)
"""
# add datasets
if not tmpFile.destinationDBlock in addedDataset:
# create dataset
Client.addDataset(
tmpFile.destinationDBlock,
self.verbose,
location=tmpOutDsLocation,
dsCheck=False)
# add to container
Client.addDatasetsToContainer(
tmpFile.dataset,
[tmpFile.destinationDBlock],
self.verbose)
# append
addedDataset.append(tmpFile.destinationDBlock)
# register libDS
if retryBuild and newLibDS != None:
Client.addDataset(
newLibDS,
self.verbose,
location=tmpOutDsLocation,
dsCheck=False)
# submit
tmpLog.info("Submitting job ...")
status,out = Client.submitJobs(retryJobs,verbose=self.verbose)
if out == None or status != 0:
tmpLog.error(status)
tmpLog.error(out)
tmpLog.error("Failed to submit jobs to Panda server")
return
# update database