本文整理汇总了Python中pandajedi.jedicore.MsgWrapper.MsgWrapper.sendMsg方法的典型用法代码示例。如果您正苦于以下问题:Python MsgWrapper.sendMsg方法的具体用法?Python MsgWrapper.sendMsg怎么用?Python MsgWrapper.sendMsg使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandajedi.jedicore.MsgWrapper.MsgWrapper
的用法示例。
在下文中一共展示了MsgWrapper.sendMsg方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: toBeThrottled
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
def toBeThrottled(self,vo,prodSourceLabel,cloudName,workQueue,jobStat):
# params
nBunch = 4
threshold = 2.0
thresholdForSite = threshold - 1.0
nJobsInBunchMax = 500
nJobsInBunchMin = 300
nJobsInBunchMaxES = 1000
nWaitingLimit = 4
nWaitingBunchLimit = 2
nParallel = 8
# make logger
tmpLog = MsgWrapper(logger)
workQueueIDs = workQueue.getIDs()
msgHeader = '{0}:{1} cloud={2} queue={3}:'.format(vo,prodSourceLabel,cloudName,workQueue.queue_name)
tmpLog.debug(msgHeader+' start workQueueID={0}'.format(str(workQueueIDs)))
# check cloud status
if not self.siteMapper.checkCloud(cloudName):
msgBody = "SKIP cloud={0} undefined".format(cloudName)
tmpLog.debug(msgHeader+" "+msgBody)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
return self.retThrottled
cloudSpec = self.siteMapper.getCloud(cloudName)
if cloudSpec['status'] in ['offline']:
msgBody = "SKIP cloud.status={0}".format(cloudSpec['status'])
tmpLog.debug(msgHeader+" "+msgBody)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
return self.retThrottled
if cloudSpec['status'] in ['test']:
if workQueue.queue_name != 'test':
msgBody = "SKIP cloud.status={0} for non test queue ({1})".format(cloudSpec['status'],
workQueue.queue_name)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
tmpLog.debug(msgHeader+" "+msgBody)
return self.retThrottled
# check if unthrottled
if workQueue.queue_share == None:
msgBody = "PASS unthrottled since share=None"
tmpLog.debug(msgHeader+" "+msgBody)
return self.retUnThrottled
# count number of jobs in each status
nRunning = 0
nNotRun = 0
nDefine = 0
nWaiting = 0
for workQueueID in workQueueIDs:
if jobStat.has_key(cloudName) and \
jobStat[cloudName].has_key(workQueueID):
tmpLog.debug(msgHeader+" "+str(jobStat[cloudName][workQueueID]))
for pState,pNumber in jobStat[cloudName][workQueueID].iteritems():
if pState in ['running']:
nRunning += pNumber
elif pState in ['assigned','activated','starting']:
nNotRun += pNumber
elif pState in ['defined']:
nDefine += pNumber
elif pState in ['waiting']:
nWaiting += pNumber
# check if higher prio tasks are waiting
tmpStat,highestPrioJobStat = self.taskBufferIF.getHighestPrioJobStat_JEDI('managed',cloudName,workQueue)
highestPrioInPandaDB = highestPrioJobStat['highestPrio']
nNotRunHighestPrio = highestPrioJobStat['nNotRun']
# the highest priority of waiting tasks
highestPrioWaiting = self.taskBufferIF.checkWaitingTaskPrio_JEDI(vo,workQueue,
'managed',cloudName)
if highestPrioWaiting == None:
msgBody = 'failed to get the highest priority of waiting tasks'
tmpLog.error(msgHeader+" "+msgBody)
return self.retTmpError
# high priority tasks are waiting
highPrioQueued = False
if highestPrioWaiting > highestPrioInPandaDB or (highestPrioWaiting == highestPrioInPandaDB and \
nNotRunHighestPrio < nJobsInBunchMin):
highPrioQueued = True
tmpLog.debug(msgHeader+" highestPrio waiting:{0} inPanda:{1} numNotRun:{2} -> highPrioQueued={3}".format(highestPrioWaiting,
highestPrioInPandaDB,
nNotRunHighestPrio,
highPrioQueued))
# set maximum number of jobs to be submitted
tmpRemainingSlot = int(nRunning*threshold-nNotRun)
if tmpRemainingSlot < nJobsInBunchMin:
# use the lower limit to avoid creating too many _sub/_dis datasets
nJobsInBunch = nJobsInBunchMin
else:
if workQueue.queue_name in ['evgensimul']:
# use higher limit for evgensimul
if tmpRemainingSlot < nJobsInBunchMaxES:
nJobsInBunch = tmpRemainingSlot
else:
nJobsInBunch = nJobsInBunchMaxES
else:
if tmpRemainingSlot < nJobsInBunchMax:
nJobsInBunch = tmpRemainingSlot
else:
nJobsInBunch = nJobsInBunchMax
nQueueLimit = nJobsInBunch*nBunch
# use special limit for CERN
if cloudName == 'CERN':
nQueueLimit = 2000
# use nPrestage for reprocessing
#.........这里部分代码省略.........
示例2: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
commandStr = commandMap['command']
commentStr = commandMap['comment']
oldStatus = commandMap['oldStatus']
tmpLog.info('start for {0}'.format(commandStr))
tmpStat = Interaction.SC_SUCCEEDED
if commandStr in ['kill','finish','reassign']:
# get active PandaIDs to be killed
pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True)
if pandaIDs == None:
tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID))
tmpStat = Interaction.SC_FAILED
# kill jobs or update task
if tmpStat == Interaction.SC_SUCCEEDED:
if pandaIDs == []:
# done since no active jobs
tmpLog.info('completed the command')
tmpTaskSpec = JediTaskSpec()
tmpTaskSpec.jediTaskID = jediTaskID
updateTaskStatus = True
if commandStr != 'reassign':
# keep oldStatus for task reassignment since it is reset when actually reassigned
tmpTaskSpec.forceUpdate('oldStatus')
else:
# extract cloud or site
tmpItems = commentStr.split(':')
if tmpItems[0] == 'cloud':
tmpTaskSpec.cloud = tmpItems[1]
else:
tmpTaskSpec.site = tmpItems[1]
# back to oldStatus if necessary
if tmpItems[2] == 'y':
tmpTaskSpec.status = oldStatus
tmpTaskSpec.forceUpdate('oldStatus')
updateTaskStatus = False
if updateTaskStatus:
tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done']
tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID})
else:
tmpLog.info('sending kill command')
tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True)
tmpLog.info('done with {0}'.format(str(tmpRet)))
elif commandStr in ['retry','incexec']:
# change task params for incexec
if commandStr == 'incexec':
try:
# read task params
taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
taskParamMap = RefinerUtils.decodeJSON(taskParam)
# remove some params
for newKey in ['nFiles','fixedSandbox']:
try:
del taskParamMap[newKey]
except:
pass
# convert new params
newParamMap = RefinerUtils.decodeJSON(commentStr)
# change params
for newKey,newVal in newParamMap.iteritems():
if newVal == None:
# delete
if newKey in taskParamMap:
del taskParamMap[newKey]
else:
# change
taskParamMap[newKey] = newVal
# overwrite sandbox
if 'fixedSandbox' in taskParamMap:
# noBuild
for tmpParam in taskParamMap['jobParameters']:
if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None:
tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox']
# build
if taskParamMap.has_key('buildSpec'):
taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox']
# merge
if taskParamMap.has_key('mergeSpec'):
taskParamMap['mergeSpec']['jobParameters'] = \
re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters'])
# encode new param
strTaskParams = RefinerUtils.encodeJSON(taskParamMap)
tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams)
if tmpRet != True:
tmpLog.error('failed to update task params')
continue
except:
errtype,errvalue = sys.exc_info()[:2]
tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue))
continue
# retry failed files
tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr)
if tmpRet == True:
tmpMsg = 'set task.status={0}'.format(newTaskStatus)
tmpLog.sendMsg(tmpMsg,self.msgType)
tmpLog.info(tmpMsg)
tmpLog.info('done with {0}'.format(tmpRet))
else:
tmpLog.error('unknown command')
except:
errtype,errvalue = sys.exc_info()[:2]
logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
示例3: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
respectLB,
tgtNumEventsPerJob,
skipFilesUsedBy,
ramCount,
taskSpec,
skipShortInput)
if retDB == False:
taskSpec.setErrDiag('failed to insert files for {0}. {1}'.format(datasetSpec.datasetName,
diagMap['errMsg']))
allUpdated = False
taskBroken = True
break
elif retDB == None:
# the dataset is locked by another or status is not applicable
allUpdated = False
tmpLog.debug('escape since task or dataset is locked')
break
elif missingFileList != []:
# files are missing
tmpErrStr = '{0} files missing in {1}'.format(len(missingFileList),datasetSpec.datasetName)
tmpLog.debug(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr)
allUpdated = False
taskOnHold = True
missingMap[datasetSpec.datasetName] = {'datasetSpec':datasetSpec,
'missingFiles':missingFileList}
else:
# reduce the number of files to be read
if taskParamMap.has_key('nFiles'):
if datasetSpec.isMaster():
taskParamMap['nFiles'] -= nFilesUnique
# reduce the number of files for scout
if useScout:
nChunksForScout = diagMap['nChunksForScout']
# number of master input files
if datasetSpec.isMaster():
checkedMaster = True
nFilesMaster += nFilesUnique
# running task
if diagMap['isRunningTask']:
runningTask = True
# no activated pending input for noWait
if noWaitParent and diagMap['nActivatedPending'] == 0 and not (useScout and nChunksForScout <= 0) \
and tmpMetadata['state'] != 'closed' and datasetSpec.isMaster():
tmpErrStr = 'insufficient inputs are ready. '
tmpErrStr += diagMap['errMsg']
tmpLog.debug(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr)
taskOnHold = True
setFrozenTime = False
break
tmpLog.debug('end loop')
# no mater input
if not taskOnHold and not taskBroken and allUpdated and nFilesMaster == 0 and checkedMaster:
tmpErrStr = 'no master input files. input dataset is empty'
tmpLog.error(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr,None)
if taskSpec.allowEmptyInput() or noWaitParent:
taskOnHold = True
else:
taskBroken = True
# index consistency
if not taskOnHold and not taskBroken and len(datasetsIdxConsistency) > 0:
self.taskBufferIF.removeFilesIndexInconsistent_JEDI(jediTaskID,datasetsIdxConsistency)
# update task status
if taskBroken:
# task is broken
taskSpec.status = 'tobroken'
tmpMsg = 'set task_status={0}'.format(taskSpec.status)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
allRet = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,taskSpec,pid=self.pid)
# change task status unless the task is running
if not runningTask:
if taskOnHold:
# go to pending state
if not taskSpec.status in ['broken','tobroken']:
taskSpec.setOnHold()
tmpMsg = 'set task_status={0}'.format(taskSpec.status)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
allRet = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,taskSpec,pid=self.pid,setFrozenTime=setFrozenTime)
elif allUpdated:
# all OK
allRet,newTaskStatus = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,getTaskStatus=True,pid=self.pid,
useWorldCloud=taskSpec.useWorldCloud())
tmpMsg = 'set task_status={0}'.format(newTaskStatus)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
# just unlock
retUnlock = self.taskBufferIF.unlockSingleTask_JEDI(jediTaskID,self.pid)
tmpLog.debug('unlock not-running task with {0}'.format(retUnlock))
else:
# just unlock
retUnlock = self.taskBufferIF.unlockSingleTask_JEDI(jediTaskID,self.pid)
tmpLog.debug('unlock task with {0}'.format(retUnlock))
tmpLog.debug('done')
except:
errtype,errvalue = sys.exc_info()[:2]
logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
示例4: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
def runImpl(self):
while True:
try:
# get a part of list
nTasks = 10
taskList = self.taskList.get(nTasks)
# no more datasets
if len(taskList) == 0:
self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__))
return
# loop over all tasks
for jediTaskID,commandMap in taskList:
# make logger
tmpLog = MsgWrapper(self.logger,' < jediTaskID={0} >'.format(jediTaskID))
commandStr = commandMap['command']
commentStr = commandMap['comment']
oldStatus = commandMap['oldStatus']
tmpLog.info('start for {0}'.format(commandStr))
tmpStat = Interaction.SC_SUCCEEDED
if commandStr in ['kill','finish','reassign']:
tmpMsg = 'executing {0}'.format(commandStr)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
# loop twice to see immediate result
for iLoop in range(2):
# get active PandaIDs to be killed
if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr:
pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID)
elif commandStr == 'reassign' and commentStr != None and 'nokill reassign' in commentStr:
pandaIDs = []
else:
pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True)
if pandaIDs == None:
tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID))
tmpStat = Interaction.SC_FAILED
# kill jobs or update task
if tmpStat == Interaction.SC_SUCCEEDED:
if pandaIDs == []:
# done since no active jobs
tmpMsg = 'completed cleaning jobs'
tmpLog.sendMsg(tmpMsg,self.msgType)
tmpLog.info(tmpMsg)
tmpTaskSpec = JediTaskSpec()
tmpTaskSpec.jediTaskID = jediTaskID
updateTaskStatus = True
if commandStr != 'reassign':
# reset oldStatus
# keep oldStatus for task reassignment since it is reset when actually reassigned
tmpTaskSpec.forceUpdate('oldStatus')
else:
# extract cloud or site
if commentStr != None:
tmpItems = commentStr.split(':')
if tmpItems[0] == 'cloud':
tmpTaskSpec.cloud = tmpItems[1]
elif tmpItems[0] == 'nucleus':
tmpTaskSpec.nucleus = tmpItems[1]
else:
tmpTaskSpec.site = tmpItems[1]
tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1])
tmpLog.sendMsg(tmpMsg,self.msgType)
tmpLog.info(tmpMsg)
# back to oldStatus if necessary
if tmpItems[2] == 'y':
tmpTaskSpec.status = oldStatus
tmpTaskSpec.forceUpdate('oldStatus')
updateTaskStatus = False
if commandStr == 'reassign':
tmpTaskSpec.forceUpdate('errorDialog')
if commandStr == 'finish':
# update datasets
tmpLog.info('updating datasets to finish')
tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI(jediTaskID, self.pid)
if not tmpStat:
tmpLog.info('wait until datasets are updated to finish')
# ignore failGoalUnreached when manually finished
tmpStat,taskSpec = self.taskBufferIF.getTaskWithID_JEDI(jediTaskID)
tmpTaskSpec.splitRule = taskSpec.splitRule
tmpTaskSpec.unsetFailGoalUnreached()
if updateTaskStatus:
tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done']
tmpMsg = 'set task_status={0}'.format(tmpTaskSpec.status)
tmpLog.sendMsg(tmpMsg,self.msgType)
tmpLog.info(tmpMsg)
tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID},
setOldModTime=True)
tmpLog.info('done with {0}'.format(str(tmpRet)))
break
else:
# kill only in the first loop
if iLoop > 0:
break
# wait or kill jobs
if 'soft finish' in commentStr:
queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID)
tmpMsg = "trying to kill {0} queued jobs for soft finish".format(len(queuedPandaIDs))
tmpLog.info(tmpMsg)
tmpRet = self.taskBufferIF.killJobs(queuedPandaIDs,commentStr,'52',True)
tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs))
tmpLog.info(tmpMsg)
#.........这里部分代码省略.........
示例5: toBeThrottled
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
def toBeThrottled(self,vo,prodSourceLabel,cloudName,workQueue,jobStat):
# component name
compName = 'prod_job_throttler'
# params
nBunch = 4
threshold = 2.0
thresholdForSite = threshold - 1.0
nJobsInBunchMax = 600
nJobsInBunchMin = 500
nJobsInBunchMaxES = 1000
if workQueue.criteria != None and 'site' in workQueue.criteria:
minTotalWalltime = 10*1000*1000
else:
minTotalWalltime = 50*1000*1000
nWaitingLimit = 4
nWaitingBunchLimit = 2
nParallel = 2
# make logger
tmpLog = MsgWrapper(logger)
workQueueIDs = workQueue.getIDs()
msgHeader = '{0}:{1} cloud={2} queue={3}:'.format(vo,prodSourceLabel,cloudName,workQueue.queue_name)
tmpLog.debug(msgHeader+' start workQueueID={0}'.format(str(workQueueIDs)))
# change threashold
if workQueue.queue_name in ['mcore']:
threshold = 5.0
# check cloud status
if not self.siteMapper.checkCloud(cloudName):
msgBody = "SKIP cloud={0} undefined".format(cloudName)
tmpLog.warning(msgHeader+" "+msgBody)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
return self.retThrottled
cloudSpec = self.siteMapper.getCloud(cloudName)
if cloudSpec['status'] in ['offline']:
msgBody = "SKIP cloud.status={0}".format(cloudSpec['status'])
tmpLog.warning(msgHeader+" "+msgBody)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
return self.retThrottled
if cloudSpec['status'] in ['test']:
if workQueue.queue_name != 'test':
msgBody = "SKIP cloud.status={0} for non test queue ({1})".format(cloudSpec['status'],
workQueue.queue_name)
tmpLog.sendMsg(msgHeader+' '+msgBody,self.msgType,msgLevel='warning')
tmpLog.warning(msgHeader+" "+msgBody)
return self.retThrottled
# check if unthrottled
if workQueue.queue_share == None:
msgBody = "PASS unthrottled since share=None"
tmpLog.debug(msgHeader+" "+msgBody)
return self.retUnThrottled
# count number of jobs in each status
nRunning = 0
nNotRun = 0
nDefine = 0
nWaiting = 0
for workQueueID in workQueueIDs:
if jobStat.has_key(cloudName) and \
jobStat[cloudName].has_key(workQueueID):
tmpLog.debug(msgHeader+" "+str(jobStat[cloudName][workQueueID]))
for pState,pNumber in jobStat[cloudName][workQueueID].iteritems():
if pState in ['running']:
nRunning += pNumber
elif pState in ['assigned','activated','starting']:
nNotRun += pNumber
elif pState in ['defined']:
nDefine += pNumber
elif pState in ['waiting']:
nWaiting += pNumber
# check if higher prio tasks are waiting
tmpStat,highestPrioJobStat = self.taskBufferIF.getHighestPrioJobStat_JEDI('managed',cloudName,workQueue)
highestPrioInPandaDB = highestPrioJobStat['highestPrio']
nNotRunHighestPrio = highestPrioJobStat['nNotRun']
# the highest priority of waiting tasks
highestPrioWaiting = self.taskBufferIF.checkWaitingTaskPrio_JEDI(vo,workQueue,
'managed',cloudName)
if highestPrioWaiting == None:
msgBody = 'failed to get the highest priority of waiting tasks'
tmpLog.error(msgHeader+" "+msgBody)
return self.retTmpError
# high priority tasks are waiting
highPrioQueued = False
if highestPrioWaiting > highestPrioInPandaDB or (highestPrioWaiting == highestPrioInPandaDB and \
nNotRunHighestPrio < nJobsInBunchMin):
highPrioQueued = True
tmpLog.debug(msgHeader+" highestPrio waiting:{0} inPanda:{1} numNotRun:{2} -> highPrioQueued={3}".format(highestPrioWaiting,
highestPrioInPandaDB,
nNotRunHighestPrio,
highPrioQueued))
# set maximum number of jobs to be submitted
tmpRemainingSlot = int(nRunning*threshold-nNotRun)
if tmpRemainingSlot < nJobsInBunchMin:
# use the lower limit to avoid creating too many _sub/_dis datasets
nJobsInBunch = nJobsInBunchMin
else:
if workQueue.queue_name in ['evgensimul']:
# use higher limit for evgensimul
if tmpRemainingSlot < nJobsInBunchMaxES:
nJobsInBunch = tmpRemainingSlot
else:
nJobsInBunch = nJobsInBunchMaxES
else:
#.........这里部分代码省略.........
示例6: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
taskSpec.setErrDiag('failed to insert files for {0}. {1}'.format(datasetSpec.datasetName,
diagMap['errMsg']))
allUpdated = False
taskBroken = True
break
elif retDB == None:
# the dataset is locked by another or status is not applicable
allUpdated = False
tmpLog.info('escape since task or dataset is locked')
break
elif missingFileList != []:
# files are missing
tmpErrStr = '{0} files missing in {1}'.format(len(missingFileList),datasetSpec.datasetName)
tmpLog.info(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr)
allUpdated = False
taskOnHold = True
missingMap[datasetSpec.datasetName] = {'datasetSpec':datasetSpec,
'missingFiles':missingFileList}
else:
# reduce the number of files to be read
if taskParamMap.has_key('nFiles'):
if datasetSpec.isMaster():
taskParamMap['nFiles'] -= nFilesUnique
# reduce the number of files for scout
if useScout:
nChunksForScout = diagMap['nChunksForScout']
# number of master input files
if datasetSpec.isMaster():
checkedMaster = True
nFilesMaster += nFilesUnique
# running task
if diagMap['isRunningTask']:
runningTask = True
# no activated pending input for noWait
if noWaitParent and diagMap['nActivatedPending'] == 0 and not (useScout and nChunksForScout == 0):
tmpErrStr = 'insufficient inputs are ready'
tmpLog.info(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr)
taskOnHold = True
setFrozenTime = False
break
tmpLog.info('end loop')
# no mater input
if not taskOnHold and not taskBroken and allUpdated and nFilesMaster == 0 and checkedMaster:
tmpErrStr = 'no master input files. input dataset is empty'
tmpLog.error(tmpErrStr)
taskSpec.setErrDiag(tmpErrStr,None)
if taskSpec.allowEmptyInput() or noWaitParent:
taskOnHold = True
else:
taskBroken = True
# update task status
if taskBroken:
# task is broken
taskSpec.status = 'tobroken'
tmpMsg = 'set task.status={0}'.format(taskSpec.status)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
allRet = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,taskSpec,pid=self.pid)
# change task status unless the task is running
if not runningTask:
if taskOnHold:
if not noWaitParent:
# initialize task generator
taskGenerator = TaskGenerator(taskSpec.vo,taskSpec.prodSourceLabel)
tmpStat = taskGenerator.initializeMods(self.taskBufferIF,
self.ddmIF.getInterface(taskSpec.vo))
if not tmpStat:
tmpErrStr = 'failed to initialize TaskGenerator'
tmpLog.error(tmpErrStr)
taskSpec.status = 'tobroken'
taskSpec.setErrDiag(tmpErrStr)
else:
# make parent tasks if necessary
tmpLog.info('make parent tasks with {0} (if necessary)'.format(taskGenerator.getClassName(taskSpec.vo,
taskSpec.prodSourceLabel)))
tmpStat = taskGenerator.doGenerate(taskSpec,taskParamMap,missingFilesMap=missingMap)
if tmpStat == Interaction.SC_FATAL:
# failed to make parent tasks
taskSpec.status = 'tobroken'
tmpLog.error('failed to make parent tasks')
# go to pending state
if not taskSpec.status in ['broken','tobroken']:
taskSpec.setOnHold()
tmpMsg = 'set task.status={0}'.format(taskSpec.status)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
allRet = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,taskSpec,pid=self.pid,setFrozenTime=setFrozenTime)
elif allUpdated:
# all OK
allRet,newTaskStatus = self.taskBufferIF.updateTaskStatusByContFeeder_JEDI(jediTaskID,getTaskStatus=True,
pid=self.pid)
tmpMsg = 'set task.status={0}'.format(newTaskStatus)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
tmpLog.info('done')
except:
errtype,errvalue = sys.exc_info()[:2]
logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
示例7: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
skipMsgList.append(tmpMsg)
else:
newNucleusList[tmpNucleus] = tmpNucleusSpec
if len(newNucleusList) > 0:
nucleusList = newNucleusList
for tmpMsg in skipMsgList:
tmpLog.info(tmpMsg)
else:
tmpLog.info(' disable data locality check since no nucleus has input data')
tmpLog.info('{0} candidates passed data check'.format(len(nucleusList)))
if nucleusList == {}:
tmpLog.error('no candidates')
taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
self.sendLogMessage(tmpLog)
continue
######################################
# weight
self.prioRW.acquire()
nucleusRW = self.prioRW[taskSpec.currentPriority]
self.prioRW.release()
totalWeight = 0
nucleusweights = []
for tmpNucleus,tmpNucleusSpec in nucleusList.iteritems():
if not tmpNucleus in nucleusRW:
nucleusRW[tmpNucleus] = 0
wStr = '1'
# with RW
if tmpNucleus in nucleusRW and nucleusRW[tmpNucleus] >= cutOffRW:
weight = 1 / float(nucleusRW[tmpNucleus])
wStr += '/( RW={0} )'.format(nucleusRW[tmpNucleus])
else:
weight = 1
wStr += '/(1 : RW={0}<{1})'.format(nucleusRW[tmpNucleus],cutOffRW)
# with data
if availableData != {}:
if availableData[tmpNucleus]['tot_size'] > 0:
weight *= float(availableData[tmpNucleus]['ava_size_any'])
weight /= float(availableData[tmpNucleus]['tot_size'])
wStr += '* ( available_input_size_DISKTAPE={0} )'.format(availableData[tmpNucleus]['ava_size_any'])
wStr += '/ ( total_input_size={0} )'.format(availableData[tmpNucleus]['tot_size'])
# negative weight for tape
if availableData[tmpNucleus]['ava_size_any'] > availableData[tmpNucleus]['ava_size_disk']:
weight *= negWeightTape
wStr += '*( weight_TAPE={0} )'.format(negWeightTape)
# fraction of free space
if tmpNucleus in fractionFreeSpace:
try:
tmpFrac = float(fractionFreeSpace[tmpNucleus]['free']) / \
float(fractionFreeSpace[tmpNucleus]['total'])
weight *= tmpFrac
wStr += '*( free_space={0} )/( total_space={1} )'.format(fractionFreeSpace[tmpNucleus]['free'],
fractionFreeSpace[tmpNucleus]['total'])
except:
pass
tmpLog.info(' use nucleus={0} weight={1} {2} criteria=+use'.format(tmpNucleus,weight,wStr))
totalWeight += weight
nucleusweights.append((tmpNucleus,weight))
tmpLog.info('final {0} candidates'.format(len(nucleusList)))
######################################
# final selection
tgtWeight = random.uniform(0,totalWeight)
candidateNucleus = None
for tmpNucleus,weight in nucleusweights:
tgtWeight -= weight
if tgtWeight <= 0:
candidateNucleus = tmpNucleus
break
if candidateNucleus == None:
candidateNucleus = nucleusweights[-1][0]
######################################
# update
nucleusSpec = nucleusList[candidateNucleus]
# get output/log datasets
tmpStat,tmpDatasetSpecs = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(taskSpec.jediTaskID,
['output','log'])
# get destinations
retMap = {taskSpec.jediTaskID: AtlasBrokerUtils.getDictToSetNucleus(nucleusSpec,tmpDatasetSpecs)}
tmpRet = self.taskBufferIF.setCloudToTasks_JEDI(retMap)
tmpLog.info(' set nucleus={0} with {1} criteria=+set'.format(candidateNucleus,tmpRet))
self.sendLogMessage(tmpLog)
if tmpRet:
tmpMsg = 'set task.status=ready'
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
# update RW table
self.prioRW.acquire()
for prio,rwMap in self.prioRW.iteritems():
if prio > taskSpec.currentPriority:
continue
if candidateNucleus in rwMap:
rwMap[candidateNucleus] += taskRW
else:
rwMap[candidateNucleus] = taskRW
self.prioRW.release()
except:
errtype,errvalue = sys.exc_info()[:2]
errMsg = '{0}.runImpl() failed with {1} {2} '.format(self.__class__.__name__,errtype.__name__,errvalue)
errMsg += 'lastJediTaskID={0} '.format(lastJediTaskID)
errMsg += traceback.format_exc()
logger.error(errMsg)
示例8: runImpl
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
or errtype == Interaction.JEDITemporaryError:
if impl.taskSpec.noWaitParent() or parentState == 'running':
tmpErrStr = 'pending until parent produces input'
setFrozenTime=False
elif errtype == Interaction.JEDITemporaryError:
tmpErrStr = 'pending due to DDM problem. {0}'.format(errvalue)
setFrozenTime=True
else:
tmpErrStr = 'pending until input is staged'
setFrozenTime=True
impl.taskSpec.status = taskStatus
impl.taskSpec.setOnHold()
impl.taskSpec.setErrDiag(tmpErrStr)
# not to update some task attributes
impl.taskSpec.resetRefinedAttrs()
tmpLog.info(tmpErrStr)
self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
oldStatus=[taskStatus],
insertUnknown=impl.unknownDatasetList,
setFrozenTime=setFrozenTime)
continue
else:
errStr = 'failed to refine task with {0}:{1}'.format(errtype.__name__,errvalue)
tmpLog.error(errStr)
tmpStat = Interaction.SC_FAILED
# register
if tmpStat != Interaction.SC_SUCCEEDED:
tmpLog.error('failed to refine the task')
if impl == None or impl.taskSpec == None:
tmpTaskSpec = JediTaskSpec()
tmpTaskSpec.jediTaskID = jediTaskID
else:
tmpTaskSpec = impl.taskSpec
tmpTaskSpec.status = 'tobroken'
if errStr != '':
tmpTaskSpec.setErrDiag(errStr,True)
self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID},oldStatus=[taskStatus])
else:
tmpLog.info('registering')
# fill JEDI tables
try:
# enable protection against task duplication
if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \
not impl.taskSpec.checkPreProcessed():
uniqueTaskName = True
else:
uniqueTaskName = False
strTaskParams = None
if impl.updatedTaskParams != None:
strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams)
if taskStatus in ['registered', 'staged']:
# unset pre-process flag
if impl.taskSpec.checkPreProcessed():
impl.taskSpec.setPostPreProcess()
# full registration
tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec,
impl.inMasterDatasetSpec,
impl.inSecDatasetSpecList,
impl.outDatasetSpecList,
impl.outputTemplateMap,
impl.jobParamsTemplate,
strTaskParams,
impl.unmergeMasterDatasetSpec,
impl.unmergeDatasetSpecMap,
uniqueTaskName,
taskStatus)
if not tmpStat:
tmpErrStr = 'failed to register the task to JEDI in a single shot'
tmpLog.error(tmpErrStr)
impl.taskSpec.status = newTaskStatus
impl.taskSpec.setErrDiag(tmpErrStr,True)
self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
oldStatus=[taskStatus])
tmpMsg = 'set task_status={0}'.format(newTaskStatus)
tmpLog.info(tmpMsg)
tmpLog.sendMsg(tmpMsg,self.msgType)
else:
# disable scouts if previous attempt didn't use it
if not impl.taskSpec.useScout(splitRule):
impl.taskSpec.setUseScout(False)
# disallow to reset some attributes
for attName in ['ramCount', 'walltime', 'cpuTime', 'startTime']:
impl.taskSpec.resetChangedAttr(attName)
# update task with new params
self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
oldStatus=[taskStatus])
# appending for incremetnal execution
tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec,
impl.inSecDatasetSpecList)
if not tmpStat:
tmpLog.error('failed to append datasets for incexec')
except:
errtype,errvalue = sys.exc_info()[:2]
tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue)
tmpLog.error(tmpErrStr)
else:
tmpLog.info('done')
except:
errtype,errvalue = sys.exc_info()[:2]
logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
示例9: toBeThrottled
# 需要导入模块: from pandajedi.jedicore.MsgWrapper import MsgWrapper [as 别名]
# 或者: from pandajedi.jedicore.MsgWrapper.MsgWrapper import sendMsg [as 别名]
#.........这里部分代码省略.........
# get cap
# set number of jobs to be submitted
if configQueueCap is None:
self.setMaxNumJobs(nJobsInBunch / nParallel)
else:
self.setMaxNumJobs(configQueueCap / nParallelCap)
# get total walltime
totWalltime = self.taskBufferIF.getTotalWallTime_JEDI(vo, prodSourceLabel, workQueue, resource_name, cloudName)
# log the current situation and limits
tmpLog.info("{0} nQueueLimit={1} nRunCap={2} nQueueCap={3}".format(msgHeader, nQueueLimit,
configRunningCap, configQueueCap))
tmpLog.info("{0} at global share level: nQueued={1} nDefine={2} nRunning={3}".format(msgHeader,
nNotRun_gs + nDefine_gs,
nDefine_gs, nRunning_gs))
tmpLog.info("{0} at resource type level: nQueued_rt={1} nDefine_rt={2} nRunning_rt={3} totWalltime={4}".format(msgHeader,
nNotRun_rt + nDefine_rt,
nDefine_rt, nRunning_rt,
totWalltime))
# check number of jobs when high priority jobs are not waiting. test jobs are sent without throttling
limitPriority = False
if workQueue.queue_name not in non_rt_wqs \
and nRunning_rt == 0 and (nNotRun_queuelimit + nDefine_queuelimit) > nQueueLimit \
and (totWalltime is None or totWalltime > minTotalWalltime):
limitPriority = True
if not highPrioQueued:
# pilot is not running or DDM has a problem
msgBody = "SKIP no running and enough nQueued_queuelimit({0})>{1} totWalltime({2})>{3} ".format(nNotRun_queuelimit + nDefine_queuelimit,
nQueueLimit, totWalltime, minTotalWalltime)
tmpLog.warning("{0} {1}".format(msgHeader, msgBody))
tmpLog.sendMsg("{0} {1}".format(msgHeader, msgBody),self.msgType, msgLevel='warning', escapeChar=True)
return self.retMergeUnThr
elif workQueue.queue_name in non_rt_wqs \
and nRunning_gs == 0 and (nNotRun_queuelimit + nDefine_queuelimit) > nQueueLimit:
limitPriority = True
if not highPrioQueued:
# pilot is not running or DDM has a problem
msgBody = "SKIP no running and enough nQueued_queuelimit({0})>{1} totWalltime({2})>{3} ".format(nNotRun_queuelimit + nDefine_queuelimit,
nQueueLimit, totWalltime, minTotalWalltime)
tmpLog.warning("{0} {1}".format(msgHeader, msgBody))
tmpLog.sendMsg("{0} {1}".format(msgHeader, msgBody),self.msgType, msgLevel='warning', escapeChar=True)
return self.retMergeUnThr
elif workQueue.queue_name not in non_rt_wqs and nRunning_rt != 0 \
and float(nNotRun_rt + nDefine_rt) / float(nRunning_rt) > threshold and \
(nNotRun_queuelimit + nDefine_queuelimit) > nQueueLimit and (totWalltime is None or totWalltime > minTotalWalltime):
limitPriority = True
if not highPrioQueued:
# enough jobs in Panda
msgBody = "SKIP nQueued_rt({0})/nRunning_rt({1})>{2} & nQueued_queuelimit({3})>{4} totWalltime({5})>{6}".format(nNotRun_rt + nDefine_rt, nRunning_rt,
threshold, nNotRun_queuelimit + nDefine_queuelimit,
nQueueLimit, totWalltime,
minTotalWalltime)
tmpLog.warning("{0} {1}".format(msgHeader, msgBody))
tmpLog.sendMsg("{0} {1}".format(msgHeader, msgBody), self.msgType, msgLevel='warning', escapeChar=True)
return self.retMergeUnThr
elif workQueue.queue_name in non_rt_wqs and nRunning_gs != 0 \
and float(nNotRun_gs + nDefine_gs) / float(nRunning_gs) > threshold and \
(nNotRun_queuelimit + nDefine_queuelimit) > nQueueLimit:
limitPriority = True
if not highPrioQueued: