本文整理汇总了Python中DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport.setJobStatus方法的典型用法代码示例。如果您正苦于以下问题:Python JobReport.setJobStatus方法的具体用法?Python JobReport.setJobStatus怎么用?Python JobReport.setJobStatus使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport
的用法示例。
在下文中一共展示了JobReport.setJobStatus方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __rescheduleFailedJob
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
def __rescheduleFailedJob( self, jobID, message, stop = True ):
"""
Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager
"""
self.log.warn( 'Failure during %s' % ( message ) )
jobManager = RPCClient( 'WorkloadManagement/JobManager' )
jobReport = JobReport( int( jobID ), '[email protected]%s' % self.siteName )
#Setting a job parameter does not help since the job will be rescheduled,
#instead set the status with the cause and then another status showing the
#reschedule operation.
jobReport.setJobStatus( status = 'Rescheduled',
application = message,
sendFlag = True )
self.log.info( 'Job will be rescheduled' )
result = jobManager.rescheduleJob( jobID )
if not result['OK']:
self.log.error( result['Message'] )
return self.__finish( 'Problem Rescheduling Job', stop )
self.log.info( 'Job Rescheduled %s' % ( jobID ) )
return self.__finish( 'Job Rescheduled', stop )
示例2: rescheduleFailedJob
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
def rescheduleFailedJob(jobID,message):
try:
import DIRAC
global jobReport
gLogger.warn('Failure during %s' %(message))
#Setting a job parameter does not help since the job will be rescheduled,
#instead set the status with the cause and then another status showing the
#reschedule operation.
if not jobReport:
gLogger.info('Creating a new JobReport Object')
jobReport = JobReport(int(jobID),'JobWrapperTemplate')
jobReport.setApplicationStatus( 'Failed %s ' % message, sendFlag = False )
jobReport.setJobStatus( 'Rescheduled', message, sendFlag = False )
# We must send Job States and Parameters before it gets reschedule
jobReport.sendStoredStatusInfo()
jobReport.sendStoredJobParameters()
gLogger.info('Job will be rescheduled after exception during execution of the JobWrapper')
jobManager = RPCClient('WorkloadManagement/JobManager')
result = jobManager.rescheduleJob(int(jobID))
if not result['OK']:
gLogger.warn(result)
# Send mail to debug errors
mailAddress = DIRAC.alarmMail
site = DIRAC.siteName()
subject = 'Job rescheduled at %s' % site
ret = systemCall(0,'hostname')
wn = ret['Value'][1]
msg = 'Job %s rescheduled at %s, wn=%s\n' % ( jobID, site, wn )
msg += message
NotificationClient().sendMail(mailAddress,subject,msg,fromAddress="[email protected]",localAttempt=False)
return
except Exception,x:
gLogger.exception('JobWrapperTemplate failed to reschedule Job')
return
示例3: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
optimizerParams[key] = value
parameters = self.__getJDLParameters( jobJDL )
if not parameters['OK']:
self.__report( jobID, 'Failed', 'Could Not Extract JDL Parameters' )
self.log.warn( parameters['Message'] )
return self.__finish( 'JDL Problem' )
params = parameters['Value']
if not params.has_key( 'JobID' ):
msg = 'Job has not JobID defined in JDL parameters'
self.__report( jobID, 'Failed', msg )
self.log.warn( msg )
return self.__finish( 'JDL Problem' )
else:
jobID = params['JobID']
if not params.has_key( 'JobType' ):
self.log.warn( 'Job has no JobType defined in JDL parameters' )
jobType = 'Unknown'
else:
jobType = params['JobType']
if not params.has_key( 'CPUTime' ):
self.log.warn( 'Job has no CPU requirement defined in JDL parameters' )
if self.extraOptions:
params['Arguments'] = params['Arguments'] + ' ' + self.extraOptions
params['ExtraOptions'] = self.extraOptions
self.log.verbose( 'Job request successful: \n %s' % ( jobRequest['Value'] ) )
self.log.info( 'Received JobID=%s, JobType=%s' % ( jobID, jobType ) )
self.log.info( 'OwnerDN: %s JobGroup: %s' % ( ownerDN, jobGroup ) )
self.jobCount += 1
try:
jobReport = JobReport( jobID, '[email protected]%s' % self.siteName )
jobReport.setJobParameter( 'MatcherServiceTime', str( matchTime ), sendFlag = False )
if os.environ.has_key( 'BOINC_JOB_ID' ):
# Report BOINC environment
for p in ['BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName']:
jobReport.setJobParameter( p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown' ), sendFlag = False )
jobReport.setJobStatus( 'Matched', 'Job Received by Agent' )
result = self.__setupProxy( ownerDN, jobGroup )
if not result[ 'OK' ]:
return self.__rescheduleFailedJob( jobID, result[ 'Message' ], self.stopOnApplicationFailure )
if 'Value' in result and result[ 'Value' ]:
proxyChain = result[ 'Value' ]
# Save the job jdl for external monitoring
self.__saveJobJDLRequest( jobID, jobJDL )
software = self.__checkInstallSoftware( jobID, params, ceDict )
if not software['OK']:
self.log.error( 'Failed to install software for job %s' % ( jobID ) )
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure )
self.log.debug( 'Before %sCE submitJob()' % ( self.ceName ) )
submission = self.__submitJob( jobID, params, ceDict, optimizerParams, proxyChain )
if not submission['OK']:
self.__report( jobID, 'Failed', submission['Message'] )
return self.__finish( submission['Message'] )
elif 'PayloadFailed' in submission:
# Do not keep running and do not overwrite the Payload error
return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'],
self.stopOnApplicationFailure )
self.log.debug( 'After %sCE submitJob()' % ( self.ceName ) )
except Exception:
self.log.exception()
return self.__rescheduleFailedJob( jobID , 'Job processing failed with exception', self.stopOnApplicationFailure )
currentTimes = list( os.times() )
for i in range( len( currentTimes ) ):
currentTimes[i] -= self.initTimes[i]
utime, stime, cutime, cstime, _elapsed = currentTimes
cpuTime = utime + stime + cutime + cstime
result = self.timeLeftUtil.getTimeLeft( cpuTime )
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
if self.cpuFactor:
# if the batch system is not defined used the CPUNormalizationFactor
# defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value']
self.__setJobParam( jobID, 'ScaledCPUTime', str( scaledCPUTime - self.scaledCPUTime ) )
self.scaledCPUTime = scaledCPUTime
return S_OK( 'Job Agent cycle complete' )
示例4: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
if 'InputSandbox' in arguments['Job']:
gJobReport.commit()
try:
result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except JobWrapperError:
gLogger.exception('JobWrapper failed to download input sandbox')
rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
return 1
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception while downloading input sandbox', lException=exc)
rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
return 1
else:
gLogger.verbose('Job has no InputSandbox requirement')
gJobReport.commit()
if 'InputData' in arguments['Job']:
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except JobWrapperError:
gLogger.exception('JobWrapper failed to resolve input data')
rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
return 1
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception while resolving input data', lException=exc)
rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
return 1
else:
gLogger.verbose('Job has a null InputData requirement:')
gLogger.verbose(arguments)
else:
gLogger.verbose('Job has no InputData requirement')
gJobReport.commit()
try:
result = job.execute(arguments)
if not result['OK']:
gLogger.error('Failed to execute job', result['Message'])
raise JobWrapperError((result['Message'], result['Errno']))
except JobWrapperError as exc:
if exc.value[1] == 0 or str(exc.value[0]) == '0':
gLogger.verbose('JobWrapper exited with status=0 after execution')
if exc.value[1] == DErrno.EWMSRESC:
gLogger.warn("Asked to reschedule job")
rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport)
job.sendJobAccounting(rescheduleResult, 'JobWrapper execution')
return 1
gLogger.exception('Job failed in execution phase')
gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
gJobReport.setJobStatus(
'Failed', 'Exception During Execution', sendFlag=False)
job.sendFailoverRequest('Failed', 'Exception During Execution')
return 1
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('Job raised exception during execution phase', lException=exc)
gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False)
job.sendFailoverRequest('Failed', 'Exception During Execution')
return 1
if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']:
try:
result = job.processJobOutputs()
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except JobWrapperError as exc:
gLogger.exception('JobWrapper failed to process output files')
gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False)
job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
return 2
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception while processing output files', lException=exc)
gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False)
job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
return 2
else:
gLogger.verbose('Job has no OutputData or OutputSandbox requirement')
try:
# Failed jobs will return 1 / successful jobs will return 0
return job.finalize()
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception during the finalization phase', lException=exc)
return 2
示例5: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
self.__report( jobID, 'Failed', msg )
self.log.warn( msg )
return self.__finish( 'JDL Problem' )
else:
jobID = params['JobID']
if not params.has_key( 'JobType' ):
self.log.warn( 'Job has no JobType defined in JDL parameters' )
jobType = 'Unknown'
else:
jobType = params['JobType']
if not params.has_key( 'SystemConfig' ):
self.log.warn( 'Job has no system configuration defined in JDL parameters' )
systemConfig = gConfig.getValue( '/LocalSite/Architecture', '' )
self.log.info( 'Setting system config to /LocalSite/Architecture = %s since it was not specified' % systemConfig )
if not systemConfig:
self.log.warn( '/LocalSite/Architecture is not defined' )
params['SystemConfig'] = systemConfig
else:
systemConfig = params['SystemConfig']
if systemConfig.lower() == 'any':
systemConfig = gConfig.getValue( '/LocalSite/Architecture', '' )
self.log.info( 'Setting SystemConfig = /LocalSite/Architecture =',
'"%s" since it was set to "ANY" in the job description' % systemConfig )
if not systemConfig:
self.log.warn( '/LocalSite/Architecture is not defined' )
params['SystemConfig'] = systemConfig
if not params.has_key( 'MaxCPUTime' ):
self.log.warn( 'Job has no CPU requirement defined in JDL parameters' )
self.log.verbose( 'Job request successful: \n %s' % ( jobRequest['Value'] ) )
self.log.info( 'Received JobID=%s, JobType=%s, SystemConfig=%s' % ( jobID, jobType, systemConfig ) )
self.log.info( 'OwnerDN: %s JobGroup: %s' % ( ownerDN, jobGroup ) )
self.jobCount += 1
try:
jobReport = JobReport( jobID, '[email protected]%s' % self.siteName )
jobReport.setJobParameter( 'MatcherServiceTime', str( matchTime ), sendFlag = False )
if self.gridCEQueue:
jobReport.setJobParameter( 'GridCEQueue', self.gridCEQueue, sendFlag = False )
jobReport.setJobStatus( 'Matched', 'Job Received by Agent' )
# self.__setJobSite( jobID, self.siteName )
if not self.pilotInfoReportedFlag:
self.__reportPilotInfo( jobID )
result = self.__setupProxy( ownerDN, jobGroup )
if not result[ 'OK' ]:
return self.__rescheduleFailedJob( jobID, result[ 'Message' ], self.stopOnApplicationFailure )
if 'Value' in result and result[ 'Value' ]:
proxyChain = result[ 'Value' ]
# Is this necessary at all?
saveJDL = self.__saveJobJDLRequest( jobID, jobJDL )
#self.__report(jobID,'Matched','Job Prepared to Submit')
#resourceParameters = self.__getJDLParameters( resourceJDL )
#if not resourceParameters['OK']:
# return resourceParameters
#resourceParams = resourceParameters['Value']
software = self.__checkInstallSoftware( jobID, params, ceDict )
if not software['OK']:
self.log.error( 'Failed to install software for job %s' % ( jobID ) )
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure )
self.log.verbose( 'Before %sCE submitJob()' % ( self.ceName ) )
submission = self.__submitJob( jobID, params, ceDict, optimizerParams, jobJDL, proxyChain )
if not submission['OK']:
self.__report( jobID, 'Failed', submission['Message'] )
return self.__finish( submission['Message'] )
elif 'PayloadFailed' in submission:
# Do not keep running and do not overwrite the Payload error
return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'],
self.stopOnApplicationFailure )
self.log.verbose( 'After %sCE submitJob()' % ( self.ceName ) )
except Exception:
self.log.exception()
return self.__rescheduleFailedJob( jobID , 'Job processing failed with exception', self.stopOnApplicationFailure )
result = self.timeLeftUtil.getTimeLeft( 0.0 )
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
if self.cpuFactor:
# if the batch system is not defined used the CPUNormalizationFactor
# defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value']
self.__setJobParam( jobID, 'ScaledCPUTime', str( scaledCPUTime - self.scaledCPUTime ) )
self.scaledCPUTime = scaledCPUTime
return S_OK( 'Job Agent cycle complete' )
示例6: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
optimizerParams = {}
for key in matcherInfo:
if key not in matcherParams:
optimizerParams[key] = matcherInfo[key]
parameters = self.__getJDLParameters(jobJDL)
if not parameters['OK']:
self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters')
self.log.warn(parameters['Message'])
return self.__finish('JDL Problem')
params = parameters['Value']
if 'JobID' not in params:
msg = 'Job has not JobID defined in JDL parameters'
self.__report(jobID, 'Failed', msg)
self.log.warn(msg)
return self.__finish('JDL Problem')
else:
jobID = params['JobID']
if 'JobType' not in params:
self.log.warn('Job has no JobType defined in JDL parameters')
jobType = 'Unknown'
else:
jobType = params['JobType']
if 'CPUTime' not in params:
self.log.warn('Job has no CPU requirement defined in JDL parameters')
# Job requirement for a number of processors
processors = int(params.get('NumberOfProcessors', 1))
wholeNode = 'WholeNode' in params
if self.extraOptions:
params['Arguments'] += ' ' + self.extraOptions
params['ExtraOptions'] = self.extraOptions
self.log.verbose('Job request successful: \n', jobRequest['Value'])
self.log.info('Received JobID=%s, JobType=%s' % (jobID, jobType))
self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, '[email protected]%s' % self.siteName)
jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False)
if 'BOINC_JOB_ID' in os.environ:
# Report BOINC environment
for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'):
jobReport.setJobParameter(thisp, gConfig.getValue('/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False)
jobReport.setJobStatus('Matched', 'Job Received by Agent')
result = self.__setupProxy(ownerDN, jobGroup)
if not result['OK']:
return self.__rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure)
proxyChain = result.get('Value')
# Save the job jdl for external monitoring
self.__saveJobJDLRequest(jobID, jobJDL)
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software['OK']:
self.log.error('Failed to install software for job', '%s' % (jobID))
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure)
self.log.debug('Before %sCE submitJob()' % (self.ceName))
result = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode)
if not result['OK']:
self.__report(jobID, 'Failed', result['Message'])
return self.__finish(result['Message'])
elif 'PayloadFailed' in result:
# Do not keep running and do not overwrite the Payload error
message = 'Payload execution failed with error code %s' % result['PayloadFailed']
if self.stopOnApplicationFailure:
return self.__finish(message, self.stopOnApplicationFailure)
else:
self.log.info(message)
self.log.debug('After %sCE submitJob()' % (self.ceName))
except Exception as subExcept: # pylint: disable=broad-except
self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True)
return self.__rescheduleFailedJob(jobID, 'Job processing failed with exception', self.stopOnApplicationFailure)
# Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1])
result = self.timeLeftUtil.getTimeLeft(cpuTime, processors)
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
# if the batch system is not defined, use the process time and the CPU normalization defined locally
self.timeLeft = self.__getCPUTimeLeft()
return S_OK('Job Agent cycle complete')
示例7: JobWrapperError
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
jobReport.commit()
try:
result = job.execute(arguments)
if not result['OK']:
gLogger.error(result['Message'])
raise JobWrapperError(result['Message'])
except Exception, x:
if str(x) == '0':
gLogger.verbose('JobWrapper exited with status=0 after execution')
pass
else:
gLogger.exception('Job failed in execution phase')
jobReport.setJobParameter('Error Message',str(x),sendFlag=False)
jobReport.setJobStatus('Failed','Exception During Execution',sendFlag=False)
job.sendFailoverRequest('Failed','Exception During Execution')
return 1
if arguments['Job'].has_key('OutputSandbox') or arguments['Job'].has_key('OutputData'):
try:
result = job.processJobOutputs(arguments)
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except Exception, x:
gLogger.exception('JobWrapper failed to process output files')
jobReport.setJobParameter('Error Message',str(x),sendFlag=False)
jobReport.setJobStatus('Failed','Uploading Job Outputs',sendFlag=False)
job.sendFailoverRequest('Failed','Uploading Job Outputs')
return 2
示例8: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
def execute( arguments ):
global gJobReport
jobID = arguments['Job']['JobID']
os.environ['JOBID'] = jobID
jobID = int( jobID )
if arguments.has_key( 'WorkingDirectory' ):
wdir = os.path.expandvars( arguments['WorkingDirectory'] )
if os.path.isdir( wdir ):
os.chdir( wdir )
else:
try:
os.makedirs( wdir )
if os.path.isdir( wdir ):
os.chdir( wdir )
except Exception:
gLogger.exception( 'JobWrapperTemplate could not create working directory' )
rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
return 1
gJobReport = JobReport( jobID, 'JobWrapper' )
try:
job = JobWrapper( jobID, gJobReport )
job.initialize( arguments )
except Exception as e:
gLogger.exception( 'JobWrapper failed the initialization phase', lException = e )
rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
try:
job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' )
except Exception as e:
gLogger.exception( 'JobWrapper failed sending job accounting', lException = e )
return 1
if arguments['Job'].has_key( 'InputSandbox' ):
gJobReport.commit()
try:
result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception:
gLogger.exception( 'JobWrapper failed to download input sandbox' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' )
return 1
else:
gLogger.verbose( 'Job has no InputSandbox requirement' )
gJobReport.commit()
if arguments['Job'].has_key( 'InputData' ):
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
gLogger.exception( 'JobWrapper failed to resolve input data' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' )
return 1
else:
gLogger.verbose( 'Job has a null InputData requirement:' )
gLogger.verbose( arguments )
else:
gLogger.verbose( 'Job has no InputData requirement' )
gJobReport.commit()
try:
result = job.execute( arguments )
if not result['OK']:
gLogger.error( 'Failed to execute job', result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
if str( x ) == '0':
gLogger.verbose( 'JobWrapper exited with status=0 after execution' )
else:
gLogger.exception( 'Job failed in execution phase' )
gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
gJobReport.setJobStatus( 'Failed', 'Exception During Execution', sendFlag = False )
job.sendFailoverRequest( 'Failed', 'Exception During Execution' )
return 1
if arguments['Job'].has_key( 'OutputSandbox' ) or arguments['Job'].has_key( 'OutputData' ):
try:
result = job.processJobOutputs( arguments )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
gLogger.exception( 'JobWrapper failed to process output files' )
gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
gJobReport.setJobStatus( 'Failed', 'Uploading Job Outputs', sendFlag = False )
job.sendFailoverRequest( 'Failed', 'Uploading Job Outputs' )
return 2
#.........这里部分代码省略.........
示例9: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
if not params.has_key("SystemConfig"):
self.log.warn("Job has no system configuration defined in JDL parameters")
systemConfig = gConfig.getValue("/LocalSite/Architecture", "")
self.log.info(
"Setting system config to /LocalSite/Architecture = %s since it was not specified" % systemConfig
)
if not systemConfig:
self.log.warn("/LocalSite/Architecture is not defined")
params["SystemConfig"] = systemConfig
else:
systemConfig = params["SystemConfig"]
if systemConfig.lower() == "any":
systemConfig = gConfig.getValue("/LocalSite/Architecture", "")
self.log.info(
"Setting SystemConfig = /LocalSite/Architecture =",
'"%s" since it was set to "ANY" in the job description' % systemConfig,
)
if not systemConfig:
self.log.warn("/LocalSite/Architecture is not defined")
params["SystemConfig"] = systemConfig
if not params.has_key("CPUTime"):
self.log.warn("Job has no CPU requirement defined in JDL parameters")
self.log.verbose("Job request successful: \n %s" % (jobRequest["Value"]))
self.log.info("Received JobID=%s, JobType=%s, SystemConfig=%s" % (jobID, jobType, systemConfig))
self.log.info("OwnerDN: %s JobGroup: %s" % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, "[email protected]%s" % self.siteName)
jobReport.setJobParameter("MatcherServiceTime", str(matchTime), sendFlag=False)
if self.gridCEQueue:
jobReport.setJobParameter("GridCEQueue", self.gridCEQueue, sendFlag=False)
if os.environ.has_key("BOINC_JOB_ID"):
# Report BOINC environment
for p in ["BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"]:
jobReport.setJobParameter(p, gConfig.getValue("/LocalSite/%s" % p, "Unknown"), sendFlag=False)
jobReport.setJobStatus("Matched", "Job Received by Agent")
# self.__setJobSite( jobID, self.siteName )
if not self.pilotInfoReportedFlag:
self.__reportPilotInfo(jobID)
result = self.__setupProxy(ownerDN, jobGroup)
if not result["OK"]:
return self.__rescheduleFailedJob(jobID, result["Message"], params, self.stopOnApplicationFailure)
if "Value" in result and result["Value"]:
proxyChain = result["Value"]
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software["OK"]:
self.log.error("Failed to install software for job %s" % (jobID))
errorMsg = software["Message"]
if not errorMsg:
errorMsg = "Failed software installation"
return self.__rescheduleFailedJob(jobID, errorMsg, params, self.stopOnApplicationFailure)
self.log.verbose("Before %sCE submitJob()" % (self.ceName))
submission = self.__submitJob(jobID, params, ceDict, optimizerParams, jobJDL, proxyChain)
if not submission["OK"]:
self.__report(jobID, "Failed", submission["Message"])
return self.__finish(submission["Message"])
elif "PayloadFailed" in submission:
# Do not keep running and do not overwrite the Payload error
return self.__finish(
"Payload execution failed with error code %s" % submission["PayloadFailed"],
self.stopOnApplicationFailure,
)
self.log.verbose("After %sCE submitJob()" % (self.ceName))
except Exception:
self.log.exception()
return self.__rescheduleFailedJob(
jobID, "Job processing failed with exception", params, self.stopOnApplicationFailure
)
currentTimes = list(os.times())
for i in range(len(currentTimes)):
currentTimes[i] -= self.initTimes[i]
utime, stime, cutime, cstime, elapsed = currentTimes
cpuTime = utime + stime + cutime + cstime
result = self.timeLeftUtil.getTimeLeft(cpuTime)
if result["OK"]:
self.timeLeft = result["Value"]
else:
if result["Message"] != "Current batch system is not supported":
self.timeLeftError = result["Message"]
else:
if self.cpuFactor:
# if the batch system is not defined used the CPUNormalizationFactor
# defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()["Value"]
self.__setJobParam(jobID, "ScaledCPUTime", str(scaledCPUTime - self.scaledCPUTime))
self.scaledCPUTime = scaledCPUTime
return S_OK("Job Agent cycle complete")
示例10: execute
# 需要导入模块: from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport import setJobStatus [as 别名]
#.........这里部分代码省略.........
for key in matcherInfo:
if key not in matcherParams:
optimizerParams[key] = matcherInfo[key]
parameters = self.__getJDLParameters(jobJDL)
if not parameters["OK"]:
self.__report(jobID, "Failed", "Could Not Extract JDL Parameters")
self.log.warn(parameters["Message"])
return self.__finish("JDL Problem")
params = parameters["Value"]
if "JobID" not in params:
msg = "Job has not JobID defined in JDL parameters"
self.__report(jobID, "Failed", msg)
self.log.warn(msg)
return self.__finish("JDL Problem")
else:
jobID = params["JobID"]
if "JobType" not in params:
self.log.warn("Job has no JobType defined in JDL parameters")
jobType = "Unknown"
else:
jobType = params["JobType"]
if "CPUTime" not in params:
self.log.warn("Job has no CPU requirement defined in JDL parameters")
if self.extraOptions:
params["Arguments"] += " " + self.extraOptions
params["ExtraOptions"] = self.extraOptions
self.log.verbose("Job request successful: \n", jobRequest["Value"])
self.log.info("Received JobID=%s, JobType=%s" % (jobID, jobType))
self.log.info("OwnerDN: %s JobGroup: %s" % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, "[email protected]%s" % self.siteName)
jobReport.setJobParameter("MatcherServiceTime", str(matchTime), sendFlag=False)
if "BOINC_JOB_ID" in os.environ:
# Report BOINC environment
for p in ("BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"):
jobReport.setJobParameter(p, gConfig.getValue("/LocalSite/%s" % p, "Unknown"), sendFlag=False)
jobReport.setJobStatus("Matched", "Job Received by Agent")
result = self.__setupProxy(ownerDN, jobGroup)
if not result["OK"]:
return self.__rescheduleFailedJob(jobID, result["Message"], self.stopOnApplicationFailure)
proxyChain = result.get("Value")
# Save the job jdl for external monitoring
self.__saveJobJDLRequest(jobID, jobJDL)
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software["OK"]:
self.log.error("Failed to install software for job", "%s" % (jobID))
errorMsg = software["Message"]
if not errorMsg:
errorMsg = "Failed software installation"
return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure)
self.log.debug("Before %sCE submitJob()" % (self.ceName))
submission = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain)
if not submission["OK"]:
self.__report(jobID, "Failed", submission["Message"])
return self.__finish(submission["Message"])
elif "PayloadFailed" in submission:
# Do not keep running and do not overwrite the Payload error
message = "Payload execution failed with error code %s" % submission["PayloadFailed"]
if self.stopOnApplicationFailure:
return self.__finish(message, self.stopOnApplicationFailure)
else:
self.log.info(message)
self.log.debug("After %sCE submitJob()" % (self.ceName))
except Exception:
self.log.exception()
return self.__rescheduleFailedJob(
jobID, "Job processing failed with exception", self.stopOnApplicationFailure
)
# Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1])
result = self.timeLeftUtil.getTimeLeft(cpuTime)
if result["OK"]:
self.timeLeft = result["Value"]
else:
if result["Message"] != "Current batch system is not supported":
self.timeLeftError = result["Message"]
else:
# if the batch system is not defined, use the process time and the CPU normalization defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()
self.__setJobParam(jobID, "ScaledCPUTime", str(scaledCPUTime - self.scaledCPUTime))
self.scaledCPUTime = scaledCPUTime
return S_OK("Job Agent cycle complete")