当前位置: 首页>>代码示例>>Python>>正文


Python Client.addDataset方法代码示例

本文整理汇总了Python中pandatools.Client.addDataset方法的典型用法代码示例。如果您正苦于以下问题:Python Client.addDataset方法的具体用法?Python Client.addDataset怎么用?Python Client.addDataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandatools.Client的用法示例。


在下文中一共展示了Client.addDataset方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: master_prepare

# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) 

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') 
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep)+1:]
            rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(),os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']:
            raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter')
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError('site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname+'.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,False,location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
#.........这里部分代码省略.........
开发者ID:Erni1619,项目名称:ganga,代码行数:103,代码来源:ExecutablePandaRTHandler.py

示例2: master_prepare

# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]

#.........这里部分代码省略.........
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break
                    
##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))
                                                            
        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
        
        runPandaBrokerage(job)
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset=string.replace(app.outputpaths[outtype],"/",".")
                dset=dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,outDsLocation)
                try:
                    Client.addDataset(dset,False,location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,outDsLocation)

            try:
                Client.addDataset(self.libDataset,False,location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset)


        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)
            
        logger.debug("master job submit?")
        self.outsite=job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite=app.se_name

       
        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID   = job.id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage       = 'AnalysisTransforms'+cacheVer#+nightVer
        jspec.transformation    = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE     = job.backend.site
        jspec.prodSourceLabel   = 'panda'
        jspec.assignedPriority  = 2000
        jspec.computingSite     = job.backend.site
        jspec.cloud             = job.backend.requirements.cloud
#        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters     = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters     += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters     += ' -i %s' % (sources)
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        
        matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn  = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec
开发者ID:MannyMoo,项目名称:ganga,代码行数:104,代码来源:AthenaMCPandaRTHandler.py

示例3: prepare

# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs
        
        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s'%job.outputdata.datasetname)
            if not configPanda['specialHandling']=='ddm:rucio' and not  configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
        
        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.")
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current' 
            else:
                if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)
#.........这里部分代码省略.........
开发者ID:VladimirRomanovsky,项目名称:ganga,代码行数:103,代码来源:ProdTransPandaRTHandler.py

示例4: retry

# 需要导入模块: from pandatools import Client [as 别名]
# 或者: from pandatools.Client import addDataset [as 别名]

#.........这里部分代码省略.........
                 if wrongSetup:    
                     errMsg += 'You need to have the same runtime env as before since all job spec need to be re-created to send jobs to a new site. '
                     errMsg += 'Please setup Athena correctly and restart pbook'                        
                     tmpLog.error(errMsg)
                     return
         # test mode
         if noSubmit:
             continue
         # invoke pathena/prun to send job to new site
         if (newSite or newOpts != {}) and retryJobs[0].processingType != 'usermerge':
             # set parent jobID and jobsetID
             newOpts['provenanceID'] = retryJobs[0].jobExecutionID
             newOpts['panda_parentJobsetID'] = retryJobs[0].sourceSite
             tmpLog.info("Constructing job spec again to be sent to another site ...")
             comStat= PsubUtils.execWithModifiedParams(retryJobs,newOpts,self.verbose,newSite)
             if comStat == 0:
                 # update database
                 time.sleep(2)
                 self.sync()
             else:
                 tmpLog.error("Failed to submit jobs to Panda server")                
             return
         # register datasets
         tmpOutDsLocation = Client.PandaSites[retryJobs[-1].computingSite]['ddm']
         addedDataset = []
         shadowDSname = None
         for tmpFile in retryJobs[-1].Files:
             if tmpFile.type in ['output','log'] and tmpFile.dataset.endswith('/'):
                 # add shadow
                 """
                 removed shadow
                 if shadowDSname == None and tmpFile.type == 'log':
                     shadowDSname = "%s%s" % (tmpFile.destinationDBlock,Client.suffixShadow)
                     Client.addDataset(shadowDSname,self.verbose)
                 """    
                 # add datasets    
                 if not tmpFile.destinationDBlock in addedDataset:
                     # create dataset
                     Client.addDataset(
                             tmpFile.destinationDBlock,
                             self.verbose,
                             location=tmpOutDsLocation,
                             dsCheck=False)
                     # add to container
                     Client.addDatasetsToContainer(
                             tmpFile.dataset,
                             [tmpFile.destinationDBlock],
                             self.verbose)
                     # append
                     addedDataset.append(tmpFile.destinationDBlock)
         # register libDS
         if retryBuild and newLibDS != None:
             Client.addDataset(
                     newLibDS,
                     self.verbose,
                     location=tmpOutDsLocation,
                     dsCheck=False)
         # submit
         tmpLog.info("Submitting job ...")            
         status,out = Client.submitJobs(retryJobs,verbose=self.verbose)
         if out == None or status != 0:
             tmpLog.error(status)
             tmpLog.error(out)
             tmpLog.error("Failed to submit jobs to Panda server")
             return
         # update database
开发者ID:PanDAWMS,项目名称:panda-client,代码行数:70,代码来源:PBookCore.py


注:本文中的pandatools.Client.addDataset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。