本文整理匯總了Python中DIRAC.WorkloadManagementSystem.Client.JobReport.JobReport類的典型用法代碼示例。如果您正苦於以下問題:Python JobReport類的具體用法?Python JobReport怎麽用?Python JobReport使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了JobReport類的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __rescheduleFailedJob
def __rescheduleFailedJob( self, jobID, message, stop = True ):
"""
Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager
"""
self.log.warn( 'Failure during %s' % ( message ) )
jobManager = RPCClient( 'WorkloadManagement/JobManager' )
jobReport = JobReport( int( jobID ), '[email protected]%s' % self.siteName )
#Setting a job parameter does not help since the job will be rescheduled,
#instead set the status with the cause and then another status showing the
#reschedule operation.
jobReport.setJobStatus( status = 'Rescheduled',
application = message,
sendFlag = True )
self.log.info( 'Job will be rescheduled' )
result = jobManager.rescheduleJob( jobID )
if not result['OK']:
self.log.error( result['Message'] )
return self.__finish( 'Problem Rescheduling Job', stop )
self.log.info( 'Job Rescheduled %s' % ( jobID ) )
return self.__finish( 'Job Rescheduled', stop )
示例2: rescheduleFailedJob
def rescheduleFailedJob(jobID,message):
try:
import DIRAC
global jobReport
gLogger.warn('Failure during %s' %(message))
#Setting a job parameter does not help since the job will be rescheduled,
#instead set the status with the cause and then another status showing the
#reschedule operation.
if not jobReport:
gLogger.info('Creating a new JobReport Object')
jobReport = JobReport(int(jobID),'JobWrapperTemplate')
jobReport.setApplicationStatus( 'Failed %s ' % message, sendFlag = False )
jobReport.setJobStatus( 'Rescheduled', message, sendFlag = False )
# We must send Job States and Parameters before it gets reschedule
jobReport.sendStoredStatusInfo()
jobReport.sendStoredJobParameters()
gLogger.info('Job will be rescheduled after exception during execution of the JobWrapper')
jobManager = RPCClient('WorkloadManagement/JobManager')
result = jobManager.rescheduleJob(int(jobID))
if not result['OK']:
gLogger.warn(result)
# Send mail to debug errors
mailAddress = DIRAC.alarmMail
site = DIRAC.siteName()
subject = 'Job rescheduled at %s' % site
ret = systemCall(0,'hostname')
wn = ret['Value'][1]
msg = 'Job %s rescheduled at %s, wn=%s\n' % ( jobID, site, wn )
msg += message
NotificationClient().sendMail(mailAddress,subject,msg,fromAddress="[email protected]",localAttempt=False)
return
except Exception,x:
gLogger.exception('JobWrapperTemplate failed to reschedule Job')
return
示例3: execute
#.........這裏部分代碼省略.........
optimizerParams[key] = value
parameters = self.__getJDLParameters( jobJDL )
if not parameters['OK']:
self.__report( jobID, 'Failed', 'Could Not Extract JDL Parameters' )
self.log.warn( parameters['Message'] )
return self.__finish( 'JDL Problem' )
params = parameters['Value']
if not params.has_key( 'JobID' ):
msg = 'Job has not JobID defined in JDL parameters'
self.__report( jobID, 'Failed', msg )
self.log.warn( msg )
return self.__finish( 'JDL Problem' )
else:
jobID = params['JobID']
if not params.has_key( 'JobType' ):
self.log.warn( 'Job has no JobType defined in JDL parameters' )
jobType = 'Unknown'
else:
jobType = params['JobType']
if not params.has_key( 'CPUTime' ):
self.log.warn( 'Job has no CPU requirement defined in JDL parameters' )
if self.extraOptions:
params['Arguments'] = params['Arguments'] + ' ' + self.extraOptions
params['ExtraOptions'] = self.extraOptions
self.log.verbose( 'Job request successful: \n %s' % ( jobRequest['Value'] ) )
self.log.info( 'Received JobID=%s, JobType=%s' % ( jobID, jobType ) )
self.log.info( 'OwnerDN: %s JobGroup: %s' % ( ownerDN, jobGroup ) )
self.jobCount += 1
try:
jobReport = JobReport( jobID, '[email protected]%s' % self.siteName )
jobReport.setJobParameter( 'MatcherServiceTime', str( matchTime ), sendFlag = False )
if os.environ.has_key( 'BOINC_JOB_ID' ):
# Report BOINC environment
for p in ['BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName']:
jobReport.setJobParameter( p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown' ), sendFlag = False )
jobReport.setJobStatus( 'Matched', 'Job Received by Agent' )
result = self.__setupProxy( ownerDN, jobGroup )
if not result[ 'OK' ]:
return self.__rescheduleFailedJob( jobID, result[ 'Message' ], self.stopOnApplicationFailure )
if 'Value' in result and result[ 'Value' ]:
proxyChain = result[ 'Value' ]
# Save the job jdl for external monitoring
self.__saveJobJDLRequest( jobID, jobJDL )
software = self.__checkInstallSoftware( jobID, params, ceDict )
if not software['OK']:
self.log.error( 'Failed to install software for job %s' % ( jobID ) )
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure )
self.log.debug( 'Before %sCE submitJob()' % ( self.ceName ) )
submission = self.__submitJob( jobID, params, ceDict, optimizerParams, proxyChain )
if not submission['OK']:
self.__report( jobID, 'Failed', submission['Message'] )
return self.__finish( submission['Message'] )
elif 'PayloadFailed' in submission:
# Do not keep running and do not overwrite the Payload error
return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'],
self.stopOnApplicationFailure )
self.log.debug( 'After %sCE submitJob()' % ( self.ceName ) )
except Exception:
self.log.exception()
return self.__rescheduleFailedJob( jobID , 'Job processing failed with exception', self.stopOnApplicationFailure )
currentTimes = list( os.times() )
for i in range( len( currentTimes ) ):
currentTimes[i] -= self.initTimes[i]
utime, stime, cutime, cstime, _elapsed = currentTimes
cpuTime = utime + stime + cutime + cstime
result = self.timeLeftUtil.getTimeLeft( cpuTime )
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
if self.cpuFactor:
# if the batch system is not defined used the CPUNormalizationFactor
# defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value']
self.__setJobParam( jobID, 'ScaledCPUTime', str( scaledCPUTime - self.scaledCPUTime ) )
self.scaledCPUTime = scaledCPUTime
return S_OK( 'Job Agent cycle complete' )
示例4: execute
def execute(arguments):
""" The only real function executed here
"""
global gJobReport
jobID = arguments['Job']['JobID']
os.environ['JOBID'] = jobID
jobID = int(jobID)
if 'WorkingDirectory' in arguments:
wdir = os.path.expandvars(arguments['WorkingDirectory'])
if os.path.isdir(wdir):
os.chdir(wdir)
else:
try:
os.makedirs(wdir) # this will raise an exception if wdir already exists (which is ~OK)
if os.path.isdir(wdir):
os.chdir(wdir)
except OSError as osError:
if osError.errno == errno.EEXIST and os.path.isdir(wdir):
gLogger.exception('JobWrapperTemplate found that the working directory already exists')
rescheduleResult = rescheduleFailedJob(jobID, 'Working Directory already exists')
else:
gLogger.exception('JobWrapperTemplate could not create working directory')
rescheduleResult = rescheduleFailedJob(jobID, 'Could Not Create Working Directory')
return 1
gJobReport = JobReport(jobID, 'JobWrapper')
try:
job = JobWrapper(jobID, gJobReport)
job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper failed the initialization phase', lException=exc)
rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport)
try:
job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization')
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper failed sending job accounting', lException=exc)
return 1
if 'InputSandbox' in arguments['Job']:
gJobReport.commit()
try:
result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except JobWrapperError:
gLogger.exception('JobWrapper failed to download input sandbox')
rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
return 1
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception while downloading input sandbox', lException=exc)
rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
return 1
else:
gLogger.verbose('Job has no InputSandbox requirement')
gJobReport.commit()
if 'InputData' in arguments['Job']:
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn(result['Message'])
raise JobWrapperError(result['Message'])
except JobWrapperError:
gLogger.exception('JobWrapper failed to resolve input data')
rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
return 1
except Exception as exc: # pylint: disable=broad-except
gLogger.exception('JobWrapper raised exception while resolving input data', lException=exc)
rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
return 1
else:
gLogger.verbose('Job has a null InputData requirement:')
gLogger.verbose(arguments)
else:
gLogger.verbose('Job has no InputData requirement')
gJobReport.commit()
try:
result = job.execute(arguments)
if not result['OK']:
gLogger.error('Failed to execute job', result['Message'])
raise JobWrapperError((result['Message'], result['Errno']))
except JobWrapperError as exc:
if exc.value[1] == 0 or str(exc.value[0]) == '0':
gLogger.verbose('JobWrapper exited with status=0 after execution')
if exc.value[1] == DErrno.EWMSRESC:
gLogger.warn("Asked to reschedule job")
rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport)
#.........這裏部分代碼省略.........
示例5: execute
#.........這裏部分代碼省略.........
self.__report( jobID, 'Failed', msg )
self.log.warn( msg )
return self.__finish( 'JDL Problem' )
else:
jobID = params['JobID']
if not params.has_key( 'JobType' ):
self.log.warn( 'Job has no JobType defined in JDL parameters' )
jobType = 'Unknown'
else:
jobType = params['JobType']
if not params.has_key( 'SystemConfig' ):
self.log.warn( 'Job has no system configuration defined in JDL parameters' )
systemConfig = gConfig.getValue( '/LocalSite/Architecture', '' )
self.log.info( 'Setting system config to /LocalSite/Architecture = %s since it was not specified' % systemConfig )
if not systemConfig:
self.log.warn( '/LocalSite/Architecture is not defined' )
params['SystemConfig'] = systemConfig
else:
systemConfig = params['SystemConfig']
if systemConfig.lower() == 'any':
systemConfig = gConfig.getValue( '/LocalSite/Architecture', '' )
self.log.info( 'Setting SystemConfig = /LocalSite/Architecture =',
'"%s" since it was set to "ANY" in the job description' % systemConfig )
if not systemConfig:
self.log.warn( '/LocalSite/Architecture is not defined' )
params['SystemConfig'] = systemConfig
if not params.has_key( 'MaxCPUTime' ):
self.log.warn( 'Job has no CPU requirement defined in JDL parameters' )
self.log.verbose( 'Job request successful: \n %s' % ( jobRequest['Value'] ) )
self.log.info( 'Received JobID=%s, JobType=%s, SystemConfig=%s' % ( jobID, jobType, systemConfig ) )
self.log.info( 'OwnerDN: %s JobGroup: %s' % ( ownerDN, jobGroup ) )
self.jobCount += 1
try:
jobReport = JobReport( jobID, '[email protected]%s' % self.siteName )
jobReport.setJobParameter( 'MatcherServiceTime', str( matchTime ), sendFlag = False )
if self.gridCEQueue:
jobReport.setJobParameter( 'GridCEQueue', self.gridCEQueue, sendFlag = False )
jobReport.setJobStatus( 'Matched', 'Job Received by Agent' )
# self.__setJobSite( jobID, self.siteName )
if not self.pilotInfoReportedFlag:
self.__reportPilotInfo( jobID )
result = self.__setupProxy( ownerDN, jobGroup )
if not result[ 'OK' ]:
return self.__rescheduleFailedJob( jobID, result[ 'Message' ], self.stopOnApplicationFailure )
if 'Value' in result and result[ 'Value' ]:
proxyChain = result[ 'Value' ]
# Is this necessary at all?
saveJDL = self.__saveJobJDLRequest( jobID, jobJDL )
#self.__report(jobID,'Matched','Job Prepared to Submit')
#resourceParameters = self.__getJDLParameters( resourceJDL )
#if not resourceParameters['OK']:
# return resourceParameters
#resourceParams = resourceParameters['Value']
software = self.__checkInstallSoftware( jobID, params, ceDict )
if not software['OK']:
self.log.error( 'Failed to install software for job %s' % ( jobID ) )
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure )
self.log.verbose( 'Before %sCE submitJob()' % ( self.ceName ) )
submission = self.__submitJob( jobID, params, ceDict, optimizerParams, jobJDL, proxyChain )
if not submission['OK']:
self.__report( jobID, 'Failed', submission['Message'] )
return self.__finish( submission['Message'] )
elif 'PayloadFailed' in submission:
# Do not keep running and do not overwrite the Payload error
return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'],
self.stopOnApplicationFailure )
self.log.verbose( 'After %sCE submitJob()' % ( self.ceName ) )
except Exception:
self.log.exception()
return self.__rescheduleFailedJob( jobID , 'Job processing failed with exception', self.stopOnApplicationFailure )
result = self.timeLeftUtil.getTimeLeft( 0.0 )
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
if self.cpuFactor:
# if the batch system is not defined used the CPUNormalizationFactor
# defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value']
self.__setJobParam( jobID, 'ScaledCPUTime', str( scaledCPUTime - self.scaledCPUTime ) )
self.scaledCPUTime = scaledCPUTime
return S_OK( 'Job Agent cycle complete' )
示例6: execute
def execute ( arguments ):
global gJobReport
jobID = arguments['Job']['JobID']
os.environ['JOBID'] = jobID
jobID = int( jobID )
if arguments.has_key( 'WorkingDirectory' ):
wdir = os.path.expandvars( arguments['WorkingDirectory'] )
if os.path.isdir( wdir ):
os.chdir( wdir )
else:
try:
os.makedirs( wdir )
if os.path.isdir( wdir ):
os.chdir( wdir )
except Exception:
gLogger.exception( 'JobWrapperTemplate could not create working directory' )
rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
return 1
gJobReport = JobReport( jobID, 'JobWrapper' )
try:
job = JobWrapper( jobID, gJobReport )
job.initialize( arguments )
except Exception:
gLogger.exception( 'JobWrapper failed the initialization phase' )
rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' )
return 1
if arguments['Job'].has_key( 'InputSandbox' ):
gJobReport.commit()
try:
result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception:
gLogger.exception( 'JobWrapper failed to download input sandbox' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' )
return 1
else:
gLogger.verbose( 'Job has no InputSandbox requirement' )
gJobReport.commit()
if arguments['Job'].has_key( 'InputData' ):
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception, x:
gLogger.exception( 'JobWrapper failed to resolve input data' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' )
return 1
else:
gLogger.verbose( 'Job has a null InputData requirement:' )
gLogger.verbose( arguments )
示例7: execute
#.........這裏部分代碼省略.........
optimizerParams = {}
for key in matcherInfo:
if key not in matcherParams:
optimizerParams[key] = matcherInfo[key]
parameters = self.__getJDLParameters(jobJDL)
if not parameters['OK']:
self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters')
self.log.warn(parameters['Message'])
return self.__finish('JDL Problem')
params = parameters['Value']
if 'JobID' not in params:
msg = 'Job has not JobID defined in JDL parameters'
self.__report(jobID, 'Failed', msg)
self.log.warn(msg)
return self.__finish('JDL Problem')
else:
jobID = params['JobID']
if 'JobType' not in params:
self.log.warn('Job has no JobType defined in JDL parameters')
jobType = 'Unknown'
else:
jobType = params['JobType']
if 'CPUTime' not in params:
self.log.warn('Job has no CPU requirement defined in JDL parameters')
# Job requirement for a number of processors
processors = int(params.get('NumberOfProcessors', 1))
wholeNode = 'WholeNode' in params
if self.extraOptions:
params['Arguments'] += ' ' + self.extraOptions
params['ExtraOptions'] = self.extraOptions
self.log.verbose('Job request successful: \n', jobRequest['Value'])
self.log.info('Received JobID=%s, JobType=%s' % (jobID, jobType))
self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, '[email protected]%s' % self.siteName)
jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False)
if 'BOINC_JOB_ID' in os.environ:
# Report BOINC environment
for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'):
jobReport.setJobParameter(thisp, gConfig.getValue('/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False)
jobReport.setJobStatus('Matched', 'Job Received by Agent')
result = self.__setupProxy(ownerDN, jobGroup)
if not result['OK']:
return self.__rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure)
proxyChain = result.get('Value')
# Save the job jdl for external monitoring
self.__saveJobJDLRequest(jobID, jobJDL)
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software['OK']:
self.log.error('Failed to install software for job', '%s' % (jobID))
errorMsg = software['Message']
if not errorMsg:
errorMsg = 'Failed software installation'
return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure)
self.log.debug('Before %sCE submitJob()' % (self.ceName))
result = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode)
if not result['OK']:
self.__report(jobID, 'Failed', result['Message'])
return self.__finish(result['Message'])
elif 'PayloadFailed' in result:
# Do not keep running and do not overwrite the Payload error
message = 'Payload execution failed with error code %s' % result['PayloadFailed']
if self.stopOnApplicationFailure:
return self.__finish(message, self.stopOnApplicationFailure)
else:
self.log.info(message)
self.log.debug('After %sCE submitJob()' % (self.ceName))
except Exception as subExcept: # pylint: disable=broad-except
self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True)
return self.__rescheduleFailedJob(jobID, 'Job processing failed with exception', self.stopOnApplicationFailure)
# Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1])
result = self.timeLeftUtil.getTimeLeft(cpuTime, processors)
if result['OK']:
self.timeLeft = result['Value']
else:
if result['Message'] != 'Current batch system is not supported':
self.timeLeftError = result['Message']
else:
# if the batch system is not defined, use the process time and the CPU normalization defined locally
self.timeLeft = self.__getCPUTimeLeft()
return S_OK('Job Agent cycle complete')
示例8: execute
def execute ( arguments ):
global gJobReport
jobID = arguments['Job']['JobID']
os.environ['JOBID'] = jobID
jobID = int( jobID )
# Fix in the environment to get a reasonable performance from dCache,
# until we move to a new version of root
# os.environ['DCACHE_RAHEAD'] = str(1)
# os.environ['DCACHE_RA_BUFFER'] = str(50*1024)
if arguments.has_key( 'WorkingDirectory' ):
wdir = os.path.expandvars( arguments['WorkingDirectory'] )
if os.path.isdir( wdir ):
os.chdir( wdir )
else:
try:
os.makedirs( wdir )
if os.path.isdir( wdir ):
os.chdir( wdir )
except Exception:
gLogger.exception( 'JobWrapperTemplate could not create working directory' )
rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
return 1
#root = arguments['CE']['Root']
gJobReport = JobReport( jobID, 'JobWrapper' )
try:
job = JobWrapper( jobID, gJobReport )
job.initialize( arguments )
except Exception:
gLogger.exception( 'JobWrapper failed the initialization phase' )
rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
job.sendWMSAccounting( 'Failed', 'Job Wrapper Initialization' )
return 1
if arguments['Job'].has_key( 'InputSandbox' ):
gJobReport.commit()
try:
result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception:
gLogger.exception( 'JobWrapper failed to download input sandbox' )
rescheduleFailedJob( jobID, 'Input Sandbox Download' )
job.sendWMSAccounting( 'Failed', 'Input Sandbox Download' )
return 1
else:
gLogger.verbose( 'Job has no InputSandbox requirement' )
gJobReport.commit()
if arguments['Job'].has_key( 'InputData' ):
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception, x:
gLogger.exception( 'JobWrapper failed to resolve input data' )
rescheduleFailedJob( jobID, 'Input Data Resolution' )
job.sendWMSAccounting( 'Failed', 'Input Data Resolution' )
return 1
else:
gLogger.verbose( 'Job has a null InputData requirement:' )
gLogger.verbose( arguments )
示例9: JobReport
'''
Created on 2015-05-19 21:45:37
@author: suo
'''
import sys
from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport
from DIRAC.Core.Base import Script
Script.parseCommandLine( ignoreErrors = False )
jobID = sys.argv[1]
experiment = sys.argv[2]
message = sys.argv[3]
jobReport = JobReport(jobID,experiment)
result = jobReport.setApplicationStatus(message)
if not result['OK']:
try:
with open('job.err','a') as errFile:
print >> errFile, 'setJobStatus error: %s' % result
except IOError:
print 'IOError:',str(e)
示例10: execute
def execute( arguments ):
global gJobReport
jobID = arguments['Job']['JobID']
os.environ['JOBID'] = jobID
jobID = int( jobID )
if arguments.has_key( 'WorkingDirectory' ):
wdir = os.path.expandvars( arguments['WorkingDirectory'] )
if os.path.isdir( wdir ):
os.chdir( wdir )
else:
try:
os.makedirs( wdir )
if os.path.isdir( wdir ):
os.chdir( wdir )
except Exception:
gLogger.exception( 'JobWrapperTemplate could not create working directory' )
rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
return 1
gJobReport = JobReport( jobID, 'JobWrapper' )
try:
job = JobWrapper( jobID, gJobReport )
job.initialize( arguments )
except Exception as e:
gLogger.exception( 'JobWrapper failed the initialization phase', lException = e )
rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
try:
job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' )
except Exception as e:
gLogger.exception( 'JobWrapper failed sending job accounting', lException = e )
return 1
if arguments['Job'].has_key( 'InputSandbox' ):
gJobReport.commit()
try:
result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception:
gLogger.exception( 'JobWrapper failed to download input sandbox' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' )
return 1
else:
gLogger.verbose( 'Job has no InputSandbox requirement' )
gJobReport.commit()
if arguments['Job'].has_key( 'InputData' ):
if arguments['Job']['InputData']:
try:
result = job.resolveInputData()
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
gLogger.exception( 'JobWrapper failed to resolve input data' )
rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport )
job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' )
return 1
else:
gLogger.verbose( 'Job has a null InputData requirement:' )
gLogger.verbose( arguments )
else:
gLogger.verbose( 'Job has no InputData requirement' )
gJobReport.commit()
try:
result = job.execute( arguments )
if not result['OK']:
gLogger.error( 'Failed to execute job', result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
if str( x ) == '0':
gLogger.verbose( 'JobWrapper exited with status=0 after execution' )
else:
gLogger.exception( 'Job failed in execution phase' )
gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
gJobReport.setJobStatus( 'Failed', 'Exception During Execution', sendFlag = False )
job.sendFailoverRequest( 'Failed', 'Exception During Execution' )
return 1
if arguments['Job'].has_key( 'OutputSandbox' ) or arguments['Job'].has_key( 'OutputData' ):
try:
result = job.processJobOutputs( arguments )
if not result['OK']:
gLogger.warn( result['Message'] )
raise JobWrapperError( result['Message'] )
except Exception as x:
gLogger.exception( 'JobWrapper failed to process output files' )
gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
gJobReport.setJobStatus( 'Failed', 'Uploading Job Outputs', sendFlag = False )
job.sendFailoverRequest( 'Failed', 'Uploading Job Outputs' )
return 2
#.........這裏部分代碼省略.........
示例11: execute
#.........這裏部分代碼省略.........
jobType = "Unknown"
else:
jobType = params["JobType"]
if not params.has_key("SystemConfig"):
self.log.warn("Job has no system configuration defined in JDL parameters")
systemConfig = gConfig.getValue("/LocalSite/Architecture", "")
self.log.info(
"Setting system config to /LocalSite/Architecture = %s since it was not specified" % systemConfig
)
if not systemConfig:
self.log.warn("/LocalSite/Architecture is not defined")
params["SystemConfig"] = systemConfig
else:
systemConfig = params["SystemConfig"]
if systemConfig.lower() == "any":
systemConfig = gConfig.getValue("/LocalSite/Architecture", "")
self.log.info(
"Setting SystemConfig = /LocalSite/Architecture =",
'"%s" since it was set to "ANY" in the job description' % systemConfig,
)
if not systemConfig:
self.log.warn("/LocalSite/Architecture is not defined")
params["SystemConfig"] = systemConfig
if not params.has_key("CPUTime"):
self.log.warn("Job has no CPU requirement defined in JDL parameters")
self.log.verbose("Job request successful: \n %s" % (jobRequest["Value"]))
self.log.info("Received JobID=%s, JobType=%s, SystemConfig=%s" % (jobID, jobType, systemConfig))
self.log.info("OwnerDN: %s JobGroup: %s" % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, "[email protected]%s" % self.siteName)
jobReport.setJobParameter("MatcherServiceTime", str(matchTime), sendFlag=False)
if self.gridCEQueue:
jobReport.setJobParameter("GridCEQueue", self.gridCEQueue, sendFlag=False)
if os.environ.has_key("BOINC_JOB_ID"):
# Report BOINC environment
for p in ["BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"]:
jobReport.setJobParameter(p, gConfig.getValue("/LocalSite/%s" % p, "Unknown"), sendFlag=False)
jobReport.setJobStatus("Matched", "Job Received by Agent")
# self.__setJobSite( jobID, self.siteName )
if not self.pilotInfoReportedFlag:
self.__reportPilotInfo(jobID)
result = self.__setupProxy(ownerDN, jobGroup)
if not result["OK"]:
return self.__rescheduleFailedJob(jobID, result["Message"], params, self.stopOnApplicationFailure)
if "Value" in result and result["Value"]:
proxyChain = result["Value"]
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software["OK"]:
self.log.error("Failed to install software for job %s" % (jobID))
errorMsg = software["Message"]
if not errorMsg:
errorMsg = "Failed software installation"
return self.__rescheduleFailedJob(jobID, errorMsg, params, self.stopOnApplicationFailure)
self.log.verbose("Before %sCE submitJob()" % (self.ceName))
submission = self.__submitJob(jobID, params, ceDict, optimizerParams, jobJDL, proxyChain)
if not submission["OK"]:
self.__report(jobID, "Failed", submission["Message"])
return self.__finish(submission["Message"])
示例12: execute
def execute(self):
""" Main execution function.
"""
self.log.info('Initializing %s' % self.version)
result = self.resolveInputVariables()
if not result['OK']:
self.log.error(result['Message'])
return result
if not self.fileReport:
self.fileReport = FileReport('Transformation/TransformationManager')
if self.InputData:
inputFiles = self.fileReport.getFiles()
for lfn in self.InputData:
if not lfn in inputFiles:
self.log.verbose('No status populated for input data %s, setting to "Unused"' % lfn)
result = self.fileReport.setFileStatus(int(self.productionID), lfn, 'Unused')
if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
self.log.info('Workflow status = %s, step status = %s' %(self.workflowStatus['OK'], self.stepStatus['OK']))
inputFiles = self.fileReport.getFiles()
for lfn in inputFiles:
if inputFiles[lfn] != 'ApplicationCrash':
self.log.info('Forcing status to "Unused" due to workflow failure for: %s' % (lfn))
self.fileReport.setFileStatus(int(self.productionID), lfn, 'Unused')
else:
inputFiles = self.fileReport.getFiles()
if inputFiles:
self.log.info('Workflow status OK, setting input file status to Processed')
for lfn in inputFiles:
self.log.info('Setting status to "Processed" for: %s' % (lfn))
self.fileReport.setFileStatus(int(self.productionID), lfn, 'Processed')
result = self.fileReport.commit()
if not result['OK']:
self.log.error('Failed to report file status to ProductionDB, request will be generated', result['Message'])
else:
self.log.info('Status of files have been properly updated in the ProcessingDB')
# Must ensure that the local job report instance is used to report the final status
# in case of failure and a subsequent failover operation
if self.workflowStatus['OK'] and self.stepStatus['OK']:
if not self.jobReport:
self.jobReport = JobReport(int(self.jobID))
jobStatus = self.jobReport.setApplicationStatus('Job Finished Successfully')
if not jobStatus['OK']:
self.log.warn(jobStatus['Message'])
# Retrieve the accumulated reporting request
reportRequest = None
if self.jobReport:
result = self.jobReport.generateRequest()
if not result['OK']:
self.log.warn('Could not generate request for job report with result:\n%s' % (result))
else:
reportRequest = result['Value']
if reportRequest:
self.log.info('Populating request with job report information')
self.request.update(reportRequest)
fileReportRequest = None
if self.fileReport:
result = self.fileReport.generateRequest()
if not result['OK']:
self.log.warn('Could not generate request for file report with result:\n%s' % (result))
else:
fileReportRequest = result['Value']
if fileReportRequest:
self.log.info('Populating request with file report information')
result = self.request.update(fileReportRequest)
accountingReport = None
if self.workflow_commons.has_key('AccountingReport'):
accountingReport = self.workflow_commons['AccountingReport']
if accountingReport:
result = accountingReport.commit()
if not result['OK']:
self.log.info('Populating request with accounting report information')
self.request.setDISETRequest(result['rpcStub'])
if self.request.isEmpty()['Value']:
self.log.info('Request is empty, nothing to do.')
return self.finalize()
request_string = self.request.toXML()['Value']
self.log.debug(request_string)
# Write out the request string
fname = '%s_%s_request.xml' % (self.productionID, self.prodJobID)
xmlfile = open(fname, 'w')
xmlfile.write(request_string)
xmlfile.close()
self.log.info('Creating failover request for deferred operations for job %s:' % self.jobID)
result = self.request.getDigest()
if result['OK']:
digest = result['Value']
self.log.info(digest)
if not self.enable:
self.log.info('Module is disabled by control flag')
#.........這裏部分代碼省略.........
示例13: FailoverRequest
class FailoverRequest(ModuleBase):
""" Handle the failover requests issued by previous steps. Used in production.
"""
#############################################################################
def __init__(self):
"""Module initialization.
"""
super(FailoverRequest, self).__init__()
self.version = __RCSID__
self.log = gLogger.getSubLogger( "FailoverRequest" )
#Internal parameters
self.enable = True
self.jobID = ''
self.productionID = None
self.prodJobID = None
#Workflow parameters
self.jobReport = None
self.fileReport = None
self.request = None
#############################################################################
def applicationSpecificInputs(self):
""" By convention the module input parameters are resolved here.
"""
self.log.debug(self.workflow_commons)
self.log.debug(self.step_commons)
if os.environ.has_key('JOBID'):
self.jobID = os.environ['JOBID']
self.log.verbose('Found WMS JobID = %s' %self.jobID)
else:
self.log.info('No WMS JobID found, disabling module via control flag')
self.enable = False
if self.step_commons.has_key('Enable'):
self.enable = self.step_commons['Enable']
if not type(self.enable) == type(True):
self.log.warn('Enable flag set to non-boolean value %s, setting to False' % self.enable)
self.enable = False
#Earlier modules will have populated the report objects
if self.workflow_commons.has_key('JobReport'):
self.jobReport = self.workflow_commons['JobReport']
if self.workflow_commons.has_key('FileReport'):
self.fileReport = self.workflow_commons['FileReport']
if self.InputData:
if type(self.InputData) != type([]):
self.InputData = self.InputData.split(';')
self.InputData = [x.replace('LFN:','') for x in self.InputData]
if self.workflow_commons.has_key('Request'):
self.request = self.workflow_commons['Request']
if not self.request:
self.request = RequestContainer()
self.request.setRequestName('job_%s_request.xml' % self.jobID)
self.request.setJobID(self.jobID)
self.request.setSourceComponent("Job_%s" % self.jobID)
if self.workflow_commons.has_key('PRODUCTION_ID'):
self.productionID = self.workflow_commons['PRODUCTION_ID']
if self.workflow_commons.has_key('JOB_ID'):
self.prodJobID = self.workflow_commons['JOB_ID']
return S_OK('Parameters resolved')
#############################################################################
def execute(self):
""" Main execution function.
"""
self.log.info('Initializing %s' % self.version)
result = self.resolveInputVariables()
if not result['OK']:
self.log.error(result['Message'])
return result
if not self.fileReport:
self.fileReport = FileReport('Transformation/TransformationManager')
if self.InputData:
inputFiles = self.fileReport.getFiles()
for lfn in self.InputData:
if not lfn in inputFiles:
self.log.verbose('No status populated for input data %s, setting to "Unused"' % lfn)
result = self.fileReport.setFileStatus(int(self.productionID), lfn, 'Unused')
if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
self.log.info('Workflow status = %s, step status = %s' %(self.workflowStatus['OK'], self.stepStatus['OK']))
inputFiles = self.fileReport.getFiles()
for lfn in inputFiles:
if inputFiles[lfn] != 'ApplicationCrash':
self.log.info('Forcing status to "Unused" due to workflow failure for: %s' % (lfn))
self.fileReport.setFileStatus(int(self.productionID), lfn, 'Unused')
else:
inputFiles = self.fileReport.getFiles()
if inputFiles:
self.log.info('Workflow status OK, setting input file status to Processed')
#.........這裏部分代碼省略.........
示例14: execute
#.........這裏部分代碼省略.........
for key in matcherInfo:
if key not in matcherParams:
optimizerParams[key] = matcherInfo[key]
parameters = self.__getJDLParameters(jobJDL)
if not parameters["OK"]:
self.__report(jobID, "Failed", "Could Not Extract JDL Parameters")
self.log.warn(parameters["Message"])
return self.__finish("JDL Problem")
params = parameters["Value"]
if "JobID" not in params:
msg = "Job has not JobID defined in JDL parameters"
self.__report(jobID, "Failed", msg)
self.log.warn(msg)
return self.__finish("JDL Problem")
else:
jobID = params["JobID"]
if "JobType" not in params:
self.log.warn("Job has no JobType defined in JDL parameters")
jobType = "Unknown"
else:
jobType = params["JobType"]
if "CPUTime" not in params:
self.log.warn("Job has no CPU requirement defined in JDL parameters")
if self.extraOptions:
params["Arguments"] += " " + self.extraOptions
params["ExtraOptions"] = self.extraOptions
self.log.verbose("Job request successful: \n", jobRequest["Value"])
self.log.info("Received JobID=%s, JobType=%s" % (jobID, jobType))
self.log.info("OwnerDN: %s JobGroup: %s" % (ownerDN, jobGroup))
self.jobCount += 1
try:
jobReport = JobReport(jobID, "[email protected]%s" % self.siteName)
jobReport.setJobParameter("MatcherServiceTime", str(matchTime), sendFlag=False)
if "BOINC_JOB_ID" in os.environ:
# Report BOINC environment
for p in ("BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"):
jobReport.setJobParameter(p, gConfig.getValue("/LocalSite/%s" % p, "Unknown"), sendFlag=False)
jobReport.setJobStatus("Matched", "Job Received by Agent")
result = self.__setupProxy(ownerDN, jobGroup)
if not result["OK"]:
return self.__rescheduleFailedJob(jobID, result["Message"], self.stopOnApplicationFailure)
proxyChain = result.get("Value")
# Save the job jdl for external monitoring
self.__saveJobJDLRequest(jobID, jobJDL)
software = self.__checkInstallSoftware(jobID, params, ceDict)
if not software["OK"]:
self.log.error("Failed to install software for job", "%s" % (jobID))
errorMsg = software["Message"]
if not errorMsg:
errorMsg = "Failed software installation"
return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure)
self.log.debug("Before %sCE submitJob()" % (self.ceName))
submission = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain)
if not submission["OK"]:
self.__report(jobID, "Failed", submission["Message"])
return self.__finish(submission["Message"])
elif "PayloadFailed" in submission:
# Do not keep running and do not overwrite the Payload error
message = "Payload execution failed with error code %s" % submission["PayloadFailed"]
if self.stopOnApplicationFailure:
return self.__finish(message, self.stopOnApplicationFailure)
else:
self.log.info(message)
self.log.debug("After %sCE submitJob()" % (self.ceName))
except Exception:
self.log.exception()
return self.__rescheduleFailedJob(
jobID, "Job processing failed with exception", self.stopOnApplicationFailure
)
# Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1])
result = self.timeLeftUtil.getTimeLeft(cpuTime)
if result["OK"]:
self.timeLeft = result["Value"]
else:
if result["Message"] != "Current batch system is not supported":
self.timeLeftError = result["Message"]
else:
# if the batch system is not defined, use the process time and the CPU normalization defined locally
self.timeLeft = self.__getCPUTimeLeft()
scaledCPUTime = self.timeLeftUtil.getScaledCPU()
self.__setJobParam(jobID, "ScaledCPUTime", str(scaledCPUTime - self.scaledCPUTime))
self.scaledCPUTime = scaledCPUTime
return S_OK("Job Agent cycle complete")