本文整理汇总了Python中DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB.getJobAttributes方法的典型用法代码示例。如果您正苦于以下问题:Python JobDB.getJobAttributes方法的具体用法?Python JobDB.getJobAttributes怎么用?Python JobDB.getJobAttributes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB
的用法示例。
在下文中一共展示了JobDB.getJobAttributes方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Matcher
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
class Matcher( object ):
""" Logic for matching
"""
def __init__( self, pilotAgentsDB = None, jobDB = None, tqDB = None, jlDB = None, opsHelper = None ):
""" c'tor
"""
if pilotAgentsDB:
self.pilotAgentsDB = pilotAgentsDB
else:
self.pilotAgentsDB = PilotAgentsDB()
if jobDB:
self.jobDB = jobDB
else:
self.jobDB = JobDB()
if tqDB:
self.tqDB = tqDB
else:
self.tqDB = TaskQueueDB()
if jlDB:
self.jlDB = jlDB
else:
self.jlDB = JobLoggingDB()
if opsHelper:
self.opsHelper = opsHelper
else:
self.opsHelper = Operations()
self.log = gLogger.getSubLogger( "Matcher" )
self.limiter = Limiter( jobDB = self.jobDB, opsHelper = self.opsHelper )
def selectJob( self, resourceDescription, credDict ):
""" Main job selection function to find the highest priority job matching the resource capacity
"""
startTime = time.time()
resourceDict = self._getResourceDict( resourceDescription, credDict )
negativeCond = self.limiter.getNegativeCondForSite( resourceDict['Site'] )
result = self.tqDB.matchAndGetJob( resourceDict, negativeCond = negativeCond )
if not result['OK']:
return result
result = result['Value']
if not result['matchFound']:
self.log.info( "No match found" )
raise RuntimeError( "No match found" )
jobID = result['jobId']
resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] )
if not resAtt['OK']:
raise RuntimeError( 'Could not retrieve job attributes' )
if not resAtt['Value']:
raise RuntimeError( "No attributes returned for job" )
if not resAtt['Value']['Status'] == 'Waiting':
self.log.error( 'Job matched by the TQ is not in Waiting state', str( jobID ) )
result = self.tqDB.deleteJob( jobID )
if not result[ 'OK' ]:
return result
raise RuntimeError( "Job %s is not in Waiting state" % str( jobID ) )
self._reportStatus( resourceDict, jobID )
result = self.jobDB.getJobJDL( jobID )
if not result['OK']:
raise RuntimeError( "Failed to get the job JDL" )
resultDict = {}
resultDict['JDL'] = result['Value']
resultDict['JobID'] = jobID
matchTime = time.time() - startTime
self.log.info( "Match time: [%s]" % str( matchTime ) )
gMonitor.addMark( "matchTime", matchTime )
# Get some extra stuff into the response returned
resOpt = self.jobDB.getJobOptParameters( jobID )
if resOpt['OK']:
for key, value in resOpt['Value'].items():
resultDict[key] = value
resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
if not resAtt['OK']:
raise RuntimeError( 'Could not retrieve job attributes' )
if not resAtt['Value']:
raise RuntimeError( 'No attributes returned for job' )
if self.opsHelper.getValue( "JobScheduling/CheckMatchingDelay", True ):
self.limiter.updateDelayCounters( resourceDict['Site'], jobID )
pilotInfoReportedFlag = resourceDict.get( 'PilotInfoReportedFlag', False )
if not pilotInfoReportedFlag:
self._updatePilotInfo( resourceDict )
self._updatePilotJobMapping( resourceDict, jobID )
resultDict['DN'] = resAtt['Value']['OwnerDN']
resultDict['Group'] = resAtt['Value']['OwnerGroup']
#.........这里部分代码省略.........
示例2: PilotStatusAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
if not retVal['OK']:
self.log.error('Fail to retrieve Info for pilots', retVal['Message'])
return retVal
dbData = retVal['Value']
for pref in dbData:
if pref in pilotsToAccount:
if dbData[pref]['Status'] not in self.finalStateList:
dbData[pref]['Status'] = pilotsToAccount[pref]['Status']
dbData[pref]['DestinationSite'] = pilotsToAccount[pref]['DestinationSite']
dbData[pref]['LastUpdateTime'] = pilotsToAccount[pref]['StatusDate']
retVal = self.__addPilotsAccountingReport(dbData)
if not retVal['OK']:
self.log.error('Fail to retrieve Info for pilots', retVal['Message'])
return retVal
self.log.info("Sending accounting records...")
retVal = gDataStoreClient.commit()
if not retVal['OK']:
self.log.error("Can't send accounting reports", retVal['Message'])
else:
self.log.info("Accounting sent for %s pilots" % len(pilotsToAccount))
accountingSent = True
if not accountingFlag or accountingSent:
for pRef in pilotsToAccount:
pDict = pilotsToAccount[pRef]
self.log.verbose('Setting Status for %s to %s' % (pRef, pDict['Status']))
self.pilotDB.setPilotStatus(pRef,
pDict['Status'],
pDict['DestinationSite'],
pDict['StatusDate'],
conn=connection)
return S_OK()
def __addPilotsAccountingReport(self, pilotsData):
""" fill accounting data
"""
for pRef in pilotsData:
pData = pilotsData[pRef]
pA = PilotAccounting()
pA.setEndTime(pData['LastUpdateTime'])
pA.setStartTime(pData['SubmissionTime'])
retVal = CS.getUsernameForDN(pData['OwnerDN'])
if not retVal['OK']:
userName = 'unknown'
self.log.error("Can't determine username for dn:", pData['OwnerDN'])
else:
userName = retVal['Value']
pA.setValueByKey('User', userName)
pA.setValueByKey('UserGroup', pData['OwnerGroup'])
result = getSiteForCE(pData['DestinationSite'])
if result['OK'] and result['Value'].strip():
pA.setValueByKey('Site', result['Value'].strip())
else:
pA.setValueByKey('Site', 'Unknown')
pA.setValueByKey('GridCE', pData['DestinationSite'])
pA.setValueByKey('GridMiddleware', pData['GridType'])
pA.setValueByKey('GridResourceBroker', pData['Broker'])
pA.setValueByKey('GridStatus', pData['Status'])
if 'Jobs' not in pData:
pA.setValueByKey('Jobs', 0)
else:
pA.setValueByKey('Jobs', len(pData['Jobs']))
self.log.verbose("Added accounting record for pilot %s" % pData['PilotID'])
retVal = gDataStoreClient.addRegister(pA)
if not retVal['OK']:
return retVal
return S_OK()
def _killPilots(self, acc):
for i in sorted(acc.keys()):
result = self.diracadmin.getPilotInfo(i)
if result['OK'] and i in result['Value'] and 'Status' in result['Value'][i]:
ret = self.diracadmin.killPilot(str(i))
if ret['OK']:
self.log.info("Successfully deleted: %s (Status : %s)" % (i, result['Value'][i]['Status']))
else:
self.log.error("Failed to delete pilot: ", "%s : %s" % (i, ret['Message']))
else:
self.log.error("Failed to get pilot info", "%s : %s" % (i, str(result)))
def _checkJobLastUpdateTime(self, joblist, StalledDays):
timeLimitToConsider = Time.dateTime() - Time.day * StalledDays
ret = False
for jobID in joblist:
result = self.jobDB.getJobAttributes(int(jobID))
if result['OK']:
if 'LastUpdateTime' in result['Value']:
lastUpdateTime = result['Value']['LastUpdateTime']
if Time.fromString(lastUpdateTime) > timeLimitToConsider:
ret = True
self.log.debug(
'Since %s updates LastUpdateTime on %s this does not to need to be deleted.' %
(str(jobID), str(lastUpdateTime)))
break
else:
self.log.error("Error taking job info from DB", result['Message'])
return ret
示例3: JobCleaningAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
errorFlag = True
if not resultTQ['OK']:
gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, result['Message'] )
errorFlag = True
if not resultLogDB['OK']:
gLogger.warn( 'Failed to remove job %d from JobLoggingDB' % jobID, result['Message'] )
errorFlag = True
if errorFlag:
error_count += 1
else:
count += 1
if self.throttlingPeriod:
time.sleep(self.throttlingPeriod)
else:
result = self.jobDB.removeJobFromDB( jobList )
if not result['OK']:
gLogger.error('Failed to delete %d jobs from JobDB' % len(jobList) )
else:
gLogger.info('Deleted %d jobs from JobDB' % len(jobList) )
for jobID in jobList:
resultTQ = self.taskQueueDB.deleteJob( jobID )
if not resultTQ['OK']:
gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, resultTQ['Message'] )
error_count += 1
else:
count += 1
result = self.jobLoggingDB.deleteJob( jobList )
if not result['OK']:
gLogger.error('Failed to delete %d jobs from JobLoggingDB' % len(jobList) )
else:
gLogger.info('Deleted %d jobs from JobLoggingDB' % len(jobList) )
if count > 0 or error_count > 0 :
gLogger.info( 'Deleted %d jobs from JobDB, %d errors' % ( count, error_count ) )
return S_OK()
def deleteJobOversizedSandbox( self, jobIDList ):
""" Delete the job oversized sandbox files from storage elements
"""
failed = {}
successful = {}
lfnDict = {}
for jobID in jobIDList:
result = self.jobDB.getJobParameter( jobID, 'OutputSandboxLFN' )
if result['OK']:
lfn = result['Value']
if lfn:
lfnDict[lfn] = jobID
else:
successful[jobID] = 'No oversized sandbox found'
else:
gLogger.warn( 'Error interrogting JobDB: %s' % result['Message'] )
if not lfnDict:
return S_OK( {'Successful':successful, 'Failed':failed} )
# Schedule removal of the LFNs now
for lfn,jobID in lfnDict.items():
result = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
if not result['OK']:
failed[jobID] = lfn
continue
if not result['Value']:
failed[jobID] = lfn
continue
ownerDN = result['Value']['OwnerDN']
ownerGroup = result['Value']['OwnerGroup']
result = self.__setRemovalRequest( lfn, ownerDN, ownerGroup )
if not result['OK']:
failed[jobID] = lfn
else:
successful[jobID] = lfn
result = {'Successful':successful, 'Failed':failed}
return S_OK( result )
def __setRemovalRequest( self, lfn, ownerDN, ownerGroup ):
""" Set removal request with the given credentials
"""
oRequest = Request()
oRequest.OwnerDN = ownerDN
oRequest.OwnerGroup = ownerGroup
oRequest.RequestName = os.path.basename( lfn ).strip() + '_removal_request.xml'
oRequest.SourceComponent = 'JobCleaningAgent'
removeFile = Operation()
removeFile.Type = 'RemoveFile'
removedFile = File()
removedFile.LFN = lfn
removeFile.addFile( removedFile )
oRequest.addOperation( removeFile )
return ReqClient().putRequest( oRequest )
示例4: StalledJobAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
self.log.warn( result['Message'] )
return S_OK( 'NoPilot' )
self.log.error( 'Failed to get pilot information', result['Message'] )
return S_ERROR( 'Failed to get the pilot status' )
pilotStatus = result['Value'][pilotReference]['Status']
return S_OK( pilotStatus )
#############################################################################
def __getStalledJob( self, job, stalledTime ):
""" Compares the most recent of LastUpdateTime and HeartBeatTime against
the stalledTime limit.
"""
result = self.__getLatestUpdateTime( job )
if not result['OK']:
return result
currentTime = toEpoch()
lastUpdate = result['Value']
elapsedTime = currentTime - lastUpdate
self.log.verbose( '(CurrentTime-LastUpdate) = %s secs' % ( elapsedTime ) )
if elapsedTime > stalledTime:
self.log.info( 'Job %s is identified as stalled with last update > %s secs ago' % ( job, elapsedTime ) )
return S_OK( 'Stalled' )
return S_ERROR( 'Job %s is running and will be ignored' % job )
#############################################################################
def __getLatestUpdateTime( self, job ):
""" Returns the most recent of HeartBeatTime and LastUpdateTime
"""
result = self.jobDB.getJobAttributes( job, ['HeartBeatTime', 'LastUpdateTime'] )
if not result['OK']:
self.log.error( 'Failed to get job attributes', result['Message'] )
if not result['OK'] or not result['Value']:
self.log.error( 'Could not get attributes for job', '%s' % job )
return S_ERROR( 'Could not get attributes for job' )
self.log.verbose( result )
latestUpdate = 0
if not result['Value']['HeartBeatTime'] or result['Value']['HeartBeatTime'] == 'None':
self.log.verbose( 'HeartBeatTime is null for job %s' % job )
else:
latestUpdate = toEpoch( fromString( result['Value']['HeartBeatTime'] ) )
if not result['Value']['LastUpdateTime'] or result['Value']['LastUpdateTime'] == 'None':
self.log.verbose( 'LastUpdateTime is null for job %s' % job )
else:
lastUpdate = toEpoch( fromString( result['Value']['LastUpdateTime'] ) )
if latestUpdate < lastUpdate:
latestUpdate = lastUpdate
if not latestUpdate:
return S_ERROR( 'LastUpdate and HeartBeat times are null for job %s' % job )
else:
self.log.verbose( 'Latest update time from epoch for job %s is %s' % ( job, latestUpdate ) )
return S_OK( latestUpdate )
#############################################################################
def __updateJobStatus( self, job, status, minorstatus = None ):
""" This method updates the job status in the JobDB, this should only be
used to fail jobs due to the optimizer chain.
"""
self.log.verbose( "self.jobDB.setJobAttribute(%s,'Status','%s',update=True)" % ( job, status ) )
示例5: PilotStatusAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
isChild = True
if status == "Running":
# Pilots can be in Running state for too long, due to bugs in the WMS
if statusDate:
statusTime = Time.fromString( statusDate )
delta = Time.dateTime() - statusTime
if delta > 4 * Time.day:
self.log.info( 'Setting pilot status to Deleted after 4 days in Running' )
status = "Deleted"
statusDate = statusTime + 4 * Time.day
elif submittedDate:
statusTime = Time.fromString( submittedDate )
delta = Time.dateTime() - statusTime
if delta > 7 * Time.day:
self.log.info( 'Setting pilot status to Deleted more than 7 days after submission still in Running' )
status = "Deleted"
statusDate = statusTime + 7 * Time.day
childRefs = []
childDicts = {}
if isParent:
for subjob in List.fromChar( job, ' Status info for the Job :' )[1:]:
chRef = List.fromChar( subjob, '\n' )[0].strip()
childDict = self.__parseJobStatus( subjob, gridType )
childRefs.append( chRef )
childDicts[chRef] = childDict
return { 'Status': status,
'DestinationSite': destination,
'StatusDate': statusDate,
'isChild': isChild,
'isParent': isParent,
'ParentRef': False,
'FinalStatus' : status in self.finalStateList,
'ChildRefs' : childRefs,
'ChildDicts' : childDicts }
def __addPilotsAccountingReport( self, pilotsData ):
""" fill accounting data
"""
for pRef in pilotsData:
pData = pilotsData[pRef]
pA = PilotAccounting()
pA.setEndTime( pData[ 'LastUpdateTime' ] )
pA.setStartTime( pData[ 'SubmissionTime' ] )
retVal = CS.getUsernameForDN( pData[ 'OwnerDN' ] )
if not retVal[ 'OK' ]:
userName = 'unknown'
self.log.error( "Can't determine username for dn:", pData[ 'OwnerDN' ] )
else:
userName = retVal[ 'Value' ]
pA.setValueByKey( 'User', userName )
pA.setValueByKey( 'UserGroup', pData[ 'OwnerGroup' ] )
result = getSiteForCE( pData[ 'DestinationSite' ] )
if result['OK'] and result[ 'Value' ].strip():
pA.setValueByKey( 'Site', result['Value'].strip() )
else:
pA.setValueByKey( 'Site', 'Unknown' )
pA.setValueByKey( 'GridCE', pData[ 'DestinationSite' ] )
pA.setValueByKey( 'GridMiddleware', pData[ 'GridType' ] )
pA.setValueByKey( 'GridResourceBroker', pData[ 'Broker' ] )
pA.setValueByKey( 'GridStatus', pData[ 'Status' ] )
if not 'Jobs' in pData:
pA.setValueByKey( 'Jobs', 0 )
else:
pA.setValueByKey( 'Jobs', len( pData['Jobs'] ) )
self.log.verbose( "Added accounting record for pilot %s" % pData[ 'PilotID' ] )
retVal = gDataStoreClient.addRegister( pA )
if not retVal[ 'OK' ]:
return retVal
return S_OK()
def _killPilots( self, acc ):
for i in sorted(acc.keys()):
result = self.diracadmin.getPilotInfo( i )
if result['OK'] and result['Value'].has_key(i) and result['Value'][i].has_key('Status'):
ret = self.diracadmin.killPilot( str(i) )
if ret['OK']:
self.log.info("Successfully deleted: %s (Status : %s)" % (i, result['Value'][i]['Status'] ) )
else:
self.log.error("Failed to delete %s : %s" % ( i, ret['Message']))
else:
self.log.error("Failed to get info. of %s : %s" % ( i, str(result)))
def _checkJobLastUpdateTime( self, joblist , StalledDays ):
timeLimitToConsider = Time.dateTime() - Time.day * StalledDays
ret = False
for JobID in joblist:
result = self.jobDB.getJobAttributes(int(JobID))
if result['OK']:
if result['Value'].has_key('LastUpdateTime'):
LastUpdateTime = result['Value']['LastUpdateTime']
if Time.fromString(LastUpdateTime) > timeLimitToConsider:
ret = True
self.log.debug('Since '+str(JobID)+' updates LastUpdateTime on '+str(LastUpdateTime)+', this does not to need to be deleted.')
break
else:
self.log.error("Error taking job info. from DB:%s" % str( result['Message'] ) )
return ret
示例6: StalledJobAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
if "No pilots found" in result["Message"]:
self.log.warn(result["Message"])
return S_OK("NoPilot")
self.log.error("Failed to get pilot information", result["Message"])
return S_ERROR("Failed to get the pilot status")
pilotStatus = result["Value"][pilotReference]["Status"]
return S_OK(pilotStatus)
#############################################################################
def __getStalledJob(self, job, stalledTime):
""" Compares the most recent of LastUpdateTime and HeartBeatTime against
the stalledTime limit.
"""
result = self.__getLatestUpdateTime(job)
if not result["OK"]:
return result
currentTime = toEpoch()
lastUpdate = result["Value"]
elapsedTime = currentTime - lastUpdate
self.log.verbose("(CurrentTime-LastUpdate) = %s secs" % (elapsedTime))
if elapsedTime > stalledTime:
self.log.info("Job %s is identified as stalled with last update > %s secs ago" % (job, elapsedTime))
return S_OK("Stalled")
return S_ERROR("Job %s is running and will be ignored" % job)
#############################################################################
def __getLatestUpdateTime(self, job):
""" Returns the most recent of HeartBeatTime and LastUpdateTime
"""
result = self.jobDB.getJobAttributes(job, ["HeartBeatTime", "LastUpdateTime"])
if not result["OK"]:
self.log.error("Failed to get job attributes", result["Message"])
if not result["OK"] or not result["Value"]:
self.log.error("Could not get attributes for job", "%s" % job)
return S_ERROR("Could not get attributes for job")
self.log.verbose(result)
latestUpdate = 0
if not result["Value"]["HeartBeatTime"] or result["Value"]["HeartBeatTime"] == "None":
self.log.verbose("HeartBeatTime is null for job %s" % job)
else:
latestUpdate = toEpoch(fromString(result["Value"]["HeartBeatTime"]))
if not result["Value"]["LastUpdateTime"] or result["Value"]["LastUpdateTime"] == "None":
self.log.verbose("LastUpdateTime is null for job %s" % job)
else:
lastUpdate = toEpoch(fromString(result["Value"]["LastUpdateTime"]))
if latestUpdate < lastUpdate:
latestUpdate = lastUpdate
if not latestUpdate:
return S_ERROR("LastUpdate and HeartBeat times are null for job %s" % job)
else:
self.log.verbose("Latest update time from epoch for job %s is %s" % (job, latestUpdate))
return S_OK(latestUpdate)
#############################################################################
def __updateJobStatus(self, job, status, minorstatus=None):
""" This method updates the job status in the JobDB, this should only be
used to fail jobs due to the optimizer chain.
"""
self.log.verbose("self.jobDB.setJobAttribute(%s,'Status','%s',update=True)" % (job, status))
示例7: JobCleaningAgent
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
errorFlag = True
if not resultTQ['OK']:
gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, result['Message'] )
errorFlag = True
if not resultLogDB['OK']:
gLogger.warn( 'Failed to remove job %d from JobLoggingDB' % jobID, result['Message'] )
errorFlag = True
if errorFlag:
error_count += 1
else:
count += 1
if self.throttlingPeriod:
time.sleep(self.throttlingPeriod)
else:
result = self.jobDB.removeJobFromDB( jobList )
if not result['OK']:
gLogger.error('Failed to delete %d jobs from JobDB' % len(jobList) )
else:
gLogger.info('Deleted %d jobs from JobDB' % len(jobList) )
for jobID in jobList:
resultTQ = self.taskQueueDB.deleteJob( jobID )
if not resultTQ['OK']:
gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, resultTQ['Message'] )
error_count += 1
else:
count += 1
result = self.jobLoggingDB.deleteJob( jobList )
if not result['OK']:
gLogger.error('Failed to delete %d jobs from JobLoggingDB' % len(jobList) )
else:
gLogger.info('Deleted %d jobs from JobLoggingDB' % len(jobList) )
if count > 0 or error_count > 0 :
gLogger.info( 'Deleted %d jobs from JobDB, %d errors' % ( count, error_count ) )
return S_OK()
def deleteJobOversizedSandbox( self, jobIDList ):
""" Delete the job oversized sandbox files from storage elements
"""
failed = {}
successful = {}
lfnDict = {}
for jobID in jobIDList:
result = self.jobDB.getJobParameter( jobID, 'OutputSandboxLFN' )
if result['OK']:
lfn = result['Value']
if lfn:
lfnDict[lfn] = jobID
else:
successful[jobID] = 'No oversized sandbox found'
else:
gLogger.warn( 'Error interrogating JobDB: %s' % result['Message'] )
if not lfnDict:
return S_OK({'Successful': successful, 'Failed': failed})
# Schedule removal of the LFNs now
for lfn,jobID in lfnDict.items():
result = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
if not result['OK']:
failed[jobID] = lfn
continue
if not result['Value']:
failed[jobID] = lfn
continue
ownerDN = result['Value']['OwnerDN']
ownerGroup = result['Value']['OwnerGroup']
result = self.__setRemovalRequest( lfn, ownerDN, ownerGroup )
if not result['OK']:
failed[jobID] = lfn
else:
successful[jobID] = lfn
result = {'Successful':successful, 'Failed':failed}
return S_OK(result)
def __setRemovalRequest( self, lfn, ownerDN, ownerGroup ):
""" Set removal request with the given credentials
"""
oRequest = Request()
oRequest.OwnerDN = ownerDN
oRequest.OwnerGroup = ownerGroup
oRequest.RequestName = os.path.basename( lfn ).strip() + '_removal_request.xml'
oRequest.SourceComponent = 'JobCleaningAgent'
removeFile = Operation()
removeFile.Type = 'RemoveFile'
removedFile = File()
removedFile.LFN = lfn
removeFile.addFile( removedFile )
oRequest.addOperation( removeFile )
return ReqClient().putRequest( oRequest )
示例8: Limiter
# 需要导入模块: from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB [as 别名]
# 或者: from DIRAC.WorkloadManagementSystem.DB.JobDB.JobDB import getJobAttributes [as 别名]
#.........这里部分代码省略.........
attLimits = result['Value']
try:
attLimits = dict([(k, int(attLimits[k])) for k in attLimits])
except Exception as excp:
errMsg = "%s/%s has to contain numbers: %s" % (section, attName, str(excp))
self.log.error(errMsg)
return S_ERROR(errMsg)
stuffDict[attName] = attLimits
self.csDictCache.add(section, 300, stuffDict)
return S_OK(stuffDict)
def __getRunningCondition(self, siteName):
""" Get extra conditions allowing site throttling
"""
siteSection = "%s/%s" % (self.__runningLimitSection, siteName)
result = self.__extractCSData(siteSection)
if not result['OK']:
return result
limitsDict = result['Value']
# limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } }
if not limitsDict:
return S_OK({})
# Check if the site exceeding the given limits
negCond = {}
for attName in limitsDict:
if attName not in self.jobDB.jobAttributeNames:
self.log.error("Attribute %s does not exist. Check the job limits" % attName)
continue
cK = "Running:%s:%s" % (siteName, attName)
data = self.condCache.get(cK)
if not data:
result = self.jobDB.getCounters(
'Jobs', [attName], {
'Site': siteName, 'Status': [
'Running', 'Matched', 'Stalled']})
if not result['OK']:
return result
data = result['Value']
data = dict([(k[0][attName], k[1]) for k in data])
self.condCache.add(cK, 10, data)
for attValue in limitsDict[attName]:
limit = limitsDict[attName][attValue]
running = data.get(attValue, 0)
if running >= limit:
self.log.verbose('Job Limit imposed at %s on %s/%s=%d,'
' %d jobs already deployed' % (siteName, attName, attValue, limit, running))
if attName not in negCond:
negCond[attName] = []
negCond[attName].append(attValue)
# negCond is something like : {'JobType': ['Merge']}
return S_OK(negCond)
def updateDelayCounters(self, siteName, jid):
# Get the info from the CS
siteSection = "%s/%s" % (self.__matchingDelaySection, siteName)
result = self.__extractCSData(siteSection)
if not result['OK']:
return result
delayDict = result['Value']
# limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } }
if not delayDict:
return S_OK()
attNames = []
for attName in delayDict:
if attName not in self.jobDB.jobAttributeNames:
self.log.error("Attribute %s does not exist in the JobDB. Please fix it!" % attName)
else:
attNames.append(attName)
result = self.jobDB.getJobAttributes(jid, attNames)
if not result['OK']:
self.log.error("While retrieving attributes coming from %s: %s" % (siteSection, result['Message']))
return result
atts = result['Value']
# Create the DictCache if not there
if siteName not in self.delayMem:
self.delayMem[siteName] = DictCache()
# Update the counters
delayCounter = self.delayMem[siteName]
for attName in atts:
attValue = atts[attName]
if attValue in delayDict[attName]:
delayTime = delayDict[attName][attValue]
self.log.notice("Adding delay for %s/%s=%s of %s secs" % (siteName, attName,
attValue, delayTime))
delayCounter.add((attName, attValue), delayTime)
return S_OK()
def __getDelayCondition(self, siteName):
""" Get extra conditions allowing matching delay
"""
if siteName not in self.delayMem:
return S_OK({})
lastRun = self.delayMem[siteName].getKeys()
negCond = {}
for attName, attValue in lastRun:
if attName not in negCond:
negCond[attName] = []
negCond[attName].append(attValue)
return S_OK(negCond)