當前位置: 首頁>>代碼示例>>Python>>正文


Python JobLoggingDB.JobLoggingDB類代碼示例

本文整理匯總了Python中DIRAC.WorkloadManagementSystem.DB.JobLoggingDB.JobLoggingDB的典型用法代碼示例。如果您正苦於以下問題:Python JobLoggingDB類的具體用法?Python JobLoggingDB怎麽用?Python JobLoggingDB使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了JobLoggingDB類的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

  def __init__( self, pilotAgentsDB = None, jobDB = None, tqDB = None, jlDB = None, opsHelper = None ):
    """ c'tor
    """
    if pilotAgentsDB:
      self.pilotAgentsDB = pilotAgentsDB
    else:
      self.pilotAgentsDB = PilotAgentsDB()
    if jobDB:
      self.jobDB = jobDB
    else:
      self.jobDB = JobDB()
    if tqDB:
      self.tqDB = tqDB
    else:
      self.tqDB = TaskQueueDB()
    if jlDB:
      self.jlDB = jlDB
    else:
      self.jlDB = JobLoggingDB()

    if opsHelper:
      self.opsHelper = opsHelper
    else:
      self.opsHelper = Operations()

    self.log = gLogger.getSubLogger( "Matcher" )

    self.limiter = Limiter( jobDB = self.jobDB, opsHelper = self.opsHelper )
開發者ID:Andrew-McNab-UK,項目名稱:DIRAC,代碼行數:28,代碼來源:Matcher.py

示例2: initialize

  def initialize( self ):
    """ Sets defaults
    """

    self.am_setOption( "PollingTime", 120 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    agentTSTypes = self.am_getOption('ProductionTypes', [])
    if agentTSTypes:
      self.prod_types = agentTSTypes
    else:
      self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    gLogger.info( "Will exclude the following Production types from cleaning %s" % ( ', '.join( self.prod_types ) ) )
    self.maxJobsAtOnce = self.am_getOption( 'MaxJobsAtOnce', 500 )
    self.jobByJob = self.am_getOption( 'JobByJob', False )
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod', 0.)
    
    self.removeStatusDelay['Done'] = self.am_getOption( 'RemoveStatusDelay/Done', 7 )
    self.removeStatusDelay['Killed'] = self.am_getOption( 'RemoveStatusDelay/Killed', 7 )
    self.removeStatusDelay['Failed'] = self.am_getOption( 'RemoveStatusDelay/Failed', 7 )
    self.removeStatusDelay['Any'] = self.am_getOption( 'RemoveStatusDelay/Any', -1 )

    return S_OK()
開發者ID:ltomassetti,項目名稱:DIRAC,代碼行數:25,代碼來源:JobCleaningAgent.py

示例3: initialize

  def initialize( self, jobDB = False, logDB = False ):
    """ Initialization of the Optimizer Agent.
    """
    if not jobDB:
      self.jobDB = JobDB()
    else:
      self.jobDB = jobDB
    if not logDB:
      self.logDB = JobLoggingDB()
    else:
      self.logDB = logDB

    trailing = "Agent"
    optimizerName = self.am_getModuleParam( 'agentName' )
    if optimizerName[ -len( trailing ):].find( trailing ) == 0:
      optimizerName = optimizerName[ :-len( trailing ) ]
    self.am_setModuleParam( 'optimizerName', optimizerName )

    self.startingMinorStatus = self.am_getModuleParam( 'optimizerName' )
    self.startingMajorStatus = "Checking"
    self.failedStatus = self.am_getOption( "FailedJobStatus" , 'Failed' )
    self.requiredJobInfo = 'jdl'
    self.am_setOption( "PollingTime", 30 )

    return self.initializeOptimizer()
開發者ID:DIRACGrid-test,項目名稱:DIRAC,代碼行數:25,代碼來源:OptimizerModule.py

示例4: initialize

  def initialize( self ):
    """Sets default parameters
"""
    self.jobDB = JobDB()
    self.logDB = JobLoggingDB()
    self.am_setOption( 'PollingTime', 60 * 60 )
    if not self.am_getOption( 'Enable', True ):
      self.log.info( 'Stalled Job Agent running in disabled mode' )
    return S_OK()
開發者ID:DIRACGrid-test,項目名稱:DIRAC,代碼行數:9,代碼來源:StalledJobAgent.py

示例5: initialize

  def initialize( self ):
    """Sets defaults
    """

    self.am_setOption( "PollingTime", 60 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    self.prod_types = self.am_getOption('ProductionTypes',['DataReconstruction', 'DataStripping', 'MCSimulation', 'Merge', 'production'])
    gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', ')))
    self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',200)
    self.jobByJob = self.am_getOption('JobByJob',True)
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.)
    return S_OK()
開發者ID:hanyl,項目名稱:DIRAC,代碼行數:15,代碼來源:JobCleaningAgent.py

示例6: initialize

  def initialize( self ):
    """Sets defaults
    """

    self.am_setOption( "PollingTime", 60 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    agentTSTypes = self.am_getOption('ProductionTypes', [])
    if agentTSTypes:
      self.prod_types = agentTSTypes
    else:
      self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', ')))
    self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',100)
    self.jobByJob = self.am_getOption('JobByJob',True)
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.)
    return S_OK()
開發者ID:afalabel,項目名稱:DIRAC,代碼行數:19,代碼來源:JobCleaningAgent.py

示例7: initialize

  def initialize(self, jobDB=None, logDB=None):
    """ Initialization of the Optimizer Agent.
    """
    self.jobDB = JobDB() if jobDB is None else jobDB
    if not self.jobDB.isValid():
      dExit(1)

    self.logDB = JobLoggingDB() if logDB is None else logDB

    optimizerName = self.am_getModuleParam('agentName')
    if optimizerName.endswith('Agent'):
      optimizerName = optimizerName[:-len('Agent')]
    self.am_setModuleParam('optimizerName', optimizerName)

    self.startingMinorStatus = self.am_getModuleParam('optimizerName')
    self.failedStatus = self.am_getOption("FailedJobStatus", 'Failed')
    self.am_setOption("PollingTime", 30)

    return self.initializeOptimizer()
開發者ID:DIRACGrid,項目名稱:DIRAC,代碼行數:19,代碼來源:OptimizerModule.py

示例8: Matcher

class Matcher( object ):
  """ Logic for matching
  """

  def __init__( self, pilotAgentsDB = None, jobDB = None, tqDB = None, jlDB = None, opsHelper = None ):
    """ c'tor
    """
    if pilotAgentsDB:
      self.pilotAgentsDB = pilotAgentsDB
    else:
      self.pilotAgentsDB = PilotAgentsDB()
    if jobDB:
      self.jobDB = jobDB
    else:
      self.jobDB = JobDB()
    if tqDB:
      self.tqDB = tqDB
    else:
      self.tqDB = TaskQueueDB()
    if jlDB:
      self.jlDB = jlDB
    else:
      self.jlDB = JobLoggingDB()

    if opsHelper:
      self.opsHelper = opsHelper
    else:
      self.opsHelper = Operations()

    self.log = gLogger.getSubLogger( "Matcher" )

    self.limiter = Limiter( jobDB = self.jobDB, opsHelper = self.opsHelper )


  def selectJob( self, resourceDescription, credDict ):
    """ Main job selection function to find the highest priority job matching the resource capacity
    """

    startTime = time.time()

    resourceDict = self._getResourceDict( resourceDescription, credDict )

    negativeCond = self.limiter.getNegativeCondForSite( resourceDict['Site'] )
    result = self.tqDB.matchAndGetJob( resourceDict, negativeCond = negativeCond )

    if not result['OK']:
      return result
    result = result['Value']
    if not result['matchFound']:
      self.log.info( "No match found" )
      raise RuntimeError( "No match found" )

    jobID = result['jobId']
    resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] )
    if not resAtt['OK']:
      raise RuntimeError( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      raise RuntimeError( "No attributes returned for job" )
    if not resAtt['Value']['Status'] == 'Waiting':
      self.log.error( 'Job matched by the TQ is not in Waiting state', str( jobID ) )
      result = self.tqDB.deleteJob( jobID )
      if not result[ 'OK' ]:
        return result
      raise RuntimeError( "Job %s is not in Waiting state" % str( jobID ) )

    self._reportStatus( resourceDict, jobID )

    result = self.jobDB.getJobJDL( jobID )
    if not result['OK']:
      raise RuntimeError( "Failed to get the job JDL" )

    resultDict = {}
    resultDict['JDL'] = result['Value']
    resultDict['JobID'] = jobID

    matchTime = time.time() - startTime
    self.log.info( "Match time: [%s]" % str( matchTime ) )
    gMonitor.addMark( "matchTime", matchTime )

    # Get some extra stuff into the response returned
    resOpt = self.jobDB.getJobOptParameters( jobID )
    if resOpt['OK']:
      for key, value in resOpt['Value'].items():
        resultDict[key] = value
    resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
    if not resAtt['OK']:
      raise RuntimeError( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      raise RuntimeError( 'No attributes returned for job' )

    if self.opsHelper.getValue( "JobScheduling/CheckMatchingDelay", True ):
      self.limiter.updateDelayCounters( resourceDict['Site'], jobID )

    pilotInfoReportedFlag = resourceDict.get( 'PilotInfoReportedFlag', False )
    if not pilotInfoReportedFlag:
      self._updatePilotInfo( resourceDict )
    self._updatePilotJobMapping( resourceDict, jobID )

    resultDict['DN'] = resAtt['Value']['OwnerDN']
    resultDict['Group'] = resAtt['Value']['OwnerGroup']
#.........這裏部分代碼省略.........
開發者ID:Andrew-McNab-UK,項目名稱:DIRAC,代碼行數:101,代碼來源:Matcher.py

示例9: JobCleaningAgent

class JobCleaningAgent( AgentModule ):
  """
      The specific agents must provide the following methods:
      - initialize() for initial settings
      - beginExecution()
      - execute() - the main method called in the agent cycle
      - endExecution()
      - finalize() - the graceful exit of the method, this one is usually used
                 for the agent restart
  """

  #############################################################################
  def initialize( self ):
    """Sets defaults
    """

    self.am_setOption( "PollingTime", 60 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    agentTSTypes = self.am_getOption('ProductionTypes', [])
    if agentTSTypes:
      self.prod_types = agentTSTypes
    else:
      self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', ')))
    self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',100)
    self.jobByJob = self.am_getOption('JobByJob',True)
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.)
    return S_OK()

  def __getAllowedJobTypes( self ):
    #Get valid jobTypes
    result = self.jobDB.getDistinctJobAttributes( 'JobType' )
    if not result[ 'OK' ]:
      return result
    cleanJobTypes = []
    for jobType in result[ 'Value' ]:
      if jobType not in self.prod_types:
        cleanJobTypes.append( jobType )
    self.log.notice( "JobTypes to clean %s" % cleanJobTypes )
    return S_OK( cleanJobTypes )

  #############################################################################
  def execute( self ):
    """The PilotAgent execution method.
    """
    #Delete jobs in "Deleted" state
    result = self.removeJobsByStatus( { 'Status' : 'Deleted' } )
    if not result[ 'OK' ]:
      return result
    #Get all the Job types that can be cleaned
    result = self.__getAllowedJobTypes()
    if not result[ 'OK' ]:
      return result
    baseCond = { 'JobType' : result[ 'Value' ] }
    # Remove jobs with final status
    for status in REMOVE_STATUS_DELAY:
      delay = REMOVE_STATUS_DELAY[ status ]
      condDict = dict( baseCond )
      condDict[ 'Status' ] = status
      delTime = str( Time.dateTime() - delay * Time.day )
      result = self.removeJobsByStatus( condDict, delTime )
      if not result['OK']:
        gLogger.warn( 'Failed to remove jobs in status %s' % status )
    return S_OK()

  def removeJobsByStatus( self, condDict, delay = False ):
    """ Remove deleted jobs
    """
    if delay:
      gLogger.verbose( "Removing jobs with %s and older than %s" % ( condDict, delay ) )
      result = self.jobDB.selectJobs( condDict, older = delay, limit = self.maxJobsAtOnce )
    else:
      gLogger.verbose( "Removing jobs with %s " % condDict )
      result = self.jobDB.selectJobs( condDict, limit = self.maxJobsAtOnce )

    if not result['OK']:
      return result

    jobList = result['Value']
    if len(jobList) > self.maxJobsAtOnce:
      jobList = jobList[:self.maxJobsAtOnce]
    if not jobList:
      return S_OK()

    self.log.notice( "Deleting %s jobs for %s" % ( len( jobList ), condDict ) )

    count = 0
    error_count = 0
    result = SandboxStoreClient( useCertificates = True ).unassignJobs( jobList )
    if not result[ 'OK' ]:
      gLogger.warn( "Cannot unassign jobs to sandboxes", result[ 'Message' ] )

      
    result = self.deleteJobOversizedSandbox( jobList ) 
    if not result[ 'OK' ]:
      gLogger.warn( "Cannot schedle removal of oversized sandboxes", result[ 'Message' ] )
      return result 
#.........這裏部分代碼省略.........
開發者ID:afalabel,項目名稱:DIRAC,代碼行數:101,代碼來源:JobCleaningAgent.py

示例10: OptimizerModule

class OptimizerModule(AgentModule):
  """
      The specific agents must provide the following methods:

        *  initialize() for initial settings
        *  beginExecution()
        *  execute() - the main method called in the agent cycle
        *  endExecution()
        *  finalize() - the graceful exit of the method, this one is usually used
                 for the agent restart
  """

  #############################################################################
  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)
    self.jobDB = None
    self.logDB = None
    self.startingMinorStatus = None
    self.startingMajorStatus = "Checking"
    self.failedStatus = None
    self.requiredJobInfo = 'jdl'
    self._initResult = None

  def initialize(self, jobDB=None, logDB=None):
    """ Initialization of the Optimizer Agent.
    """
    self.jobDB = JobDB() if jobDB is None else jobDB
    if not self.jobDB.isValid():
      dExit(1)

    self.logDB = JobLoggingDB() if logDB is None else logDB

    optimizerName = self.am_getModuleParam('agentName')
    if optimizerName.endswith('Agent'):
      optimizerName = optimizerName[:-len('Agent')]
    self.am_setModuleParam('optimizerName', optimizerName)

    self.startingMinorStatus = self.am_getModuleParam('optimizerName')
    self.failedStatus = self.am_getOption("FailedJobStatus", 'Failed')
    self.am_setOption("PollingTime", 30)

    return self.initializeOptimizer()

  def initializeOptimizer(self):
    """ To be overwritten by inheriting class
    """
    return S_OK()

  #############################################################################
  def execute(self):
    """ The main agent execution method
    """

    result = self.initializeOptimizer()
    if not result['OK']:
      return result
    self._initResult = result['Value']

    condition = {'Status': self.startingMajorStatus}
    if self.startingMinorStatus:
      condition['MinorStatus'] = self.startingMinorStatus

    result = self.jobDB.selectJobs(condition)
    if not result['OK']:
      self.log.warn('Failed to get a job list from the JobDB')
      return S_ERROR('Failed to get a job list from the JobDB')

    if not result['Value']:
      self.log.verbose('No pending jobs to process')
      return S_OK('No work to do')

    for job in result['Value']:
      result = self.getJobDefinition(job)
      if not result['OK']:
        self.setFailedJob(job, result['Message'], '')
        continue
      jobDef = result['Value']
      result = self.optimizeJob(job, jobDef['classad'])

    return S_OK()

  #############################################################################
  def optimizeJob(self, job, classAdJob):
    """ Call the corresponding Optimizer checkJob method
    """
    self.log.info('Job %s will be processed by %sAgent' % (job, self.am_getModuleParam('optimizerName')))
    result = self.checkJob(job, classAdJob)
    if not result['OK']:
      self.setFailedJob(job, result['Message'], classAdJob)
    return result

  #############################################################################
  def getJobDefinition(self, job, jobDef=False):
    """ Retrieve JDL of the Job and return jobDef dictionary
    """
    if not jobDef:
      jobDef = {}
    # If not jdl in jobinfo load it
#.........這裏部分代碼省略.........
開發者ID:DIRACGrid,項目名稱:DIRAC,代碼行數:101,代碼來源:OptimizerModule.py

示例11: StalledJobAgent

class StalledJobAgent( AgentModule ):
  """
The specific agents must provide the following methods:
- initialize() for initial settings
- beginExecution()
- execute() - the main method called in the agent cycle
- endExecution()
- finalize() - the graceful exit of the method, this one is usually used
for the agent restart
"""
  jobDB = None
  logDB = None
  matchedTime = 7200
  rescheduledTime = 600
  completedTime = 86400

  #############################################################################
  def initialize( self ):
    """Sets default parameters
"""
    self.jobDB = JobDB()
    self.logDB = JobLoggingDB()
    self.am_setOption( 'PollingTime', 60 * 60 )
    if not self.am_getOption( 'Enable', True ):
      self.log.info( 'Stalled Job Agent running in disabled mode' )
    return S_OK()

  #############################################################################
  def execute( self ):
    """ The main agent execution method
"""
    self.log.verbose( 'Waking up Stalled Job Agent' )

    wms_instance = getSystemInstance( 'WorkloadManagement' )
    if not wms_instance:
      return S_ERROR( 'Can not get the WorkloadManagement system instance' )
    wrapperSection = cfgPath( 'Systems', 'WorkloadManagement', wms_instance, 'JobWrapper' )

    stalledTime = self.am_getOption( 'StalledTimeHours', 2 )
    failedTime = self.am_getOption( 'FailedTimeHours', 6 )

    self.matchedTime = self.am_getOption( 'MatchedTime', self.matchedTime )
    self.rescheduledTime = self.am_getOption( 'RescheduledTime', self.rescheduledTime )
    self.completedTime = self.am_getOption( 'CompletedTime', self.completedTime )

    self.log.verbose( 'StalledTime = %s cycles' % ( stalledTime ) )
    self.log.verbose( 'FailedTime = %s cycles' % ( failedTime ) )

    watchdogCycle = gConfig.getValue( cfgPath( wrapperSection , 'CheckingTime' ), 30 * 60 )
    watchdogCycle = max( watchdogCycle, gConfig.getValue( cfgPath( wrapperSection , 'MinCheckingTime' ), 20 * 60 ) )

    # Add half cycle to avoid race conditions
    stalledTime = watchdogCycle * ( stalledTime + 0.5 )
    failedTime = watchdogCycle * ( failedTime + 0.5 )

    result = self.__markStalledJobs( stalledTime )
    if not result['OK']:
      self.log.error( 'Failed to detect stalled jobs', result['Message'] )

    #Note, jobs will be revived automatically during the heartbeat signal phase and
    #subsequent status changes will result in jobs not being selected by the
    #stalled job agent.

    result = self.__failStalledJobs( failedTime )
    if not result['OK']:
      self.log.error( 'Failed to process stalled jobs', result['Message'] )

    result = self.__failCompletedJobs()
    if not result['OK']:
      self.log.error( 'Failed to process completed jobs', result['Message'] )

    result = self.__kickStuckJobs()
    if not result['OK']:
      self.log.error( 'Failed to kick stuck jobs', result['Message'] )

    return S_OK( 'Stalled Job Agent cycle complete' )

  #############################################################################
  def __markStalledJobs( self, stalledTime ):
    """ Identifies stalled jobs running without update longer than stalledTime.
"""
    stalledCounter = 0
    runningCounter = 0
    result = self.jobDB.selectJobs( {'Status':'Running'} )
    if not result['OK']:
      return result
    if not result['Value']:
      return S_OK()
    jobs = result['Value']
    self.log.info( '%s Running jobs will be checked for being stalled' % ( len( jobs ) ) )
    jobs.sort()
# jobs = jobs[:10] #for debugging
    for job in jobs:
      result = self.__getStalledJob( job, stalledTime )
      if result['OK']:
        self.log.verbose( 'Updating status to Stalled for job %s' % ( job ) )
        self.__updateJobStatus( job, 'Stalled' )
        stalledCounter += 1
      else:
        self.log.verbose( result['Message'] )
#.........這裏部分代碼省略.........
開發者ID:DIRACGrid-test,項目名稱:DIRAC,代碼行數:101,代碼來源:StalledJobAgent.py

示例12: JobCleaningAgent

class JobCleaningAgent( AgentModule ):
  """
      The specific agents must provide the following methods:
      - initialize() for initial settings
      - beginExecution()
      - execute() - the main method called in the agent cycle
      - endExecution()
      - finalize() - the graceful exit of the method, this one is usually used
                 for the agent restart
  """

  #############################################################################
  def initialize( self ):
    """Sets defaults
    """

    self.am_setOption( "PollingTime", 60 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    self.prod_types = self.am_getOption('ProductionTypes',['DataReconstruction', 'DataStripping', 'MCSimulation', 'Merge', 'production'])
    gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', ')))
    self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',200)
    self.jobByJob = self.am_getOption('JobByJob',True)
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.)
    return S_OK()

  def __getAllowedJobTypes( self ):
    #Get valid jobTypes
    result = self.jobDB.getDistinctJobAttributes( 'JobType' )
    if not result[ 'OK' ]:
      return result
    cleanJobTypes = []
    for jobType in result[ 'Value' ]:
      if jobType not in self.prod_types:
        cleanJobTypes.append( jobType )
    self.log.notice( "JobTypes to clean %s" % cleanJobTypes )
    return S_OK( cleanJobTypes )

  #############################################################################
  def execute( self ):
    """The PilotAgent execution method.
    """
    #Delete jobs in "Deleted" state
    result = self.removeJobsByStatus( { 'Status' : 'Deleted' } )
    if not result[ 'OK' ]:
      return result
    #Get all the Job types that can be cleaned
    result = self.__getAllowedJobTypes()
    if not result[ 'OK' ]:
      return result
    baseCond = { 'JobType' : result[ 'Value' ] }
    # Remove jobs with final status
    for status in REMOVE_STATUS_DELAY:
      delay = REMOVE_STATUS_DELAY[ status ]
      condDict = dict( baseCond )
      condDict[ 'Status' ] = status
      delTime = str( Time.dateTime() - delay * Time.day )
      result = self.removeJobsByStatus( condDict, delTime )
      if not result['OK']:
        gLogger.warn( 'Failed to remove jobs in status %s' % status )
    return S_OK()

  def removeJobsByStatus( self, condDict, delay = False ):
    """ Remove deleted jobs
    """
    if delay:
      gLogger.verbose( "Removing jobs with %s and older than %s" % ( condDict, delay ) )
      result = self.jobDB.selectJobs( condDict, older = delay, limit = self.maxJobsAtOnce )
    else:
      gLogger.verbose( "Removing jobs with %s " % condDict )
      result = self.jobDB.selectJobs( condDict, limit = self.maxJobsAtOnce )

    if not result['OK']:
      return result

    jobList = result['Value']
    if len(jobList) > self.maxJobsAtOnce:
      jobList = jobList[:self.maxJobsAtOnce]
    if not jobList:
      return S_OK()

    self.log.notice( "Deleting %s jobs for %s" % ( len( jobList ), condDict ) )

    count = 0
    error_count = 0
    result = SandboxStoreClient( useCertificates = True ).unassignJobs( jobList )
    if not result[ 'OK' ]:
      gLogger.warn( "Cannot unassign jobs to sandboxes", result[ 'Message' ] )

    if self.jobByJob:
      for jobID in jobList:
        resultJobDB = self.jobDB.removeJobFromDB( jobID )
        resultTQ = self.taskQueueDB.deleteJob( jobID )
        resultLogDB = self.jobLoggingDB.deleteJob( jobID )
        errorFlag = False
        if not resultJobDB['OK']:
          gLogger.warn( 'Failed to remove job %d from JobDB' % jobID, result['Message'] )
          errorFlag = True
#.........這裏部分代碼省略.........
開發者ID:hanyl,項目名稱:DIRAC,代碼行數:101,代碼來源:JobCleaningAgent.py

示例13: StalledJobAgent

class StalledJobAgent(AgentModule):
    """
The specific agents must provide the following methods:
- initialize() for initial settings
- beginExecution()
- execute() - the main method called in the agent cycle
- endExecution()
- finalize() - the graceful exit of the method, this one is usually used
for the agent restart
"""

    jobDB = None
    logDB = None
    matchedTime = 7200
    rescheduledTime = 600
    completedTime = 86400

    #############################################################################
    def initialize(self):
        """Sets default parameters
"""
        self.jobDB = JobDB()
        self.logDB = JobLoggingDB()
        self.am_setOption("PollingTime", 60 * 60)
        if not self.am_getOption("Enable", True):
            self.log.info("Stalled Job Agent running in disabled mode")
        return S_OK()

    #############################################################################
    def execute(self):
        """ The main agent execution method
"""
        self.log.verbose("Waking up Stalled Job Agent")

        wms_instance = getSystemInstance("WorkloadManagement")
        if not wms_instance:
            return S_ERROR("Can not get the WorkloadManagement system instance")
        wrapperSection = cfgPath("Systems", "WorkloadManagement", wms_instance, "JobWrapper")

        stalledTime = self.am_getOption("StalledTimeHours", 2)
        failedTime = self.am_getOption("FailedTimeHours", 6)

        self.matchedTime = self.am_getOption("MatchedTime", self.matchedTime)
        self.rescheduledTime = self.am_getOption("RescheduledTime", self.rescheduledTime)
        self.completedTime = self.am_getOption("CompletedTime", self.completedTime)

        self.log.verbose("StalledTime = %s cycles" % (stalledTime))
        self.log.verbose("FailedTime = %s cycles" % (failedTime))

        watchdogCycle = gConfig.getValue(cfgPath(wrapperSection, "CheckingTime"), 30 * 60)
        watchdogCycle = max(watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, "MinCheckingTime"), 20 * 60))

        # Add half cycle to avoid race conditions
        stalledTime = watchdogCycle * (stalledTime + 0.5)
        failedTime = watchdogCycle * (failedTime + 0.5)

        result = self.__markStalledJobs(stalledTime)
        if not result["OK"]:
            self.log.error("Failed to detect stalled jobs", result["Message"])

        # Note, jobs will be revived automatically during the heartbeat signal phase and
        # subsequent status changes will result in jobs not being selected by the
        # stalled job agent.

        result = self.__failStalledJobs(failedTime)
        if not result["OK"]:
            self.log.error("Failed to process stalled jobs", result["Message"])

        result = self.__failCompletedJobs()
        if not result["OK"]:
            self.log.error("Failed to process completed jobs", result["Message"])

        result = self.__kickStuckJobs()
        if not result["OK"]:
            self.log.error("Failed to kick stuck jobs", result["Message"])

        return S_OK("Stalled Job Agent cycle complete")

    #############################################################################
    def __markStalledJobs(self, stalledTime):
        """ Identifies stalled jobs running without update longer than stalledTime.
"""
        stalledCounter = 0
        runningCounter = 0
        result = self.jobDB.selectJobs({"Status": "Running"})
        if not result["OK"]:
            return result
        if not result["Value"]:
            return S_OK()
        jobs = result["Value"]
        self.log.info("%s Running jobs will be checked for being stalled" % (len(jobs)))
        jobs.sort()
        # jobs = jobs[:10] #for debugging
        for job in jobs:
            result = self.__getStalledJob(job, stalledTime)
            if result["OK"]:
                self.log.verbose("Updating status to Stalled for job %s" % (job))
                self.__updateJobStatus(job, "Stalled")
                stalledCounter += 1
            else:
#.........這裏部分代碼省略.........
開發者ID:DIRACGrid,項目名稱:DIRAC,代碼行數:101,代碼來源:StalledJobAgent.py

示例14: JobCleaningAgent

class JobCleaningAgent( AgentModule ):
  """
      The specific agents must provide the following methods:

         *  initialize() for initial settings
         *  beginExecution()
         *  execute() - the main method called in the agent cycle
         *  endExecution()
         *  finalize() - the graceful exit of the method, this one is usually used for the agent restart
  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    #clients
    # FIXME: shouldn't we avoid using the DBs directly, and instead go through the service?
    self.jobDB = None
    self.taskQueueDB = None
    self.jobLoggingDB = None

    self.maxJobsAtOnce = 100
    self.jobByJob = False
    self.throttlingPeriod = 0.

    self.prodTypes = []

    self.removeStatusDelay = {}

  #############################################################################
  def initialize( self ):
    """ Sets defaults
    """

    self.am_setOption( "PollingTime", 120 )
    self.jobDB = JobDB()
    self.taskQueueDB = TaskQueueDB()
    self.jobLoggingDB = JobLoggingDB()
    # self.sandboxDB = SandboxDB( 'SandboxDB' )
    agentTSTypes = self.am_getOption('ProductionTypes', [])
    if agentTSTypes:
      self.prodTypes = agentTSTypes
    else:
      self.prodTypes = Operations().getValue(
          'Transformations/DataProcessing', ['MCSimulation', 'Merge'])
    gLogger.info("Will exclude the following Production types from cleaning %s" % (
        ', '.join(self.prodTypes)))
    self.maxJobsAtOnce = self.am_getOption( 'MaxJobsAtOnce', 500 )
    self.jobByJob = self.am_getOption( 'JobByJob', False )
    self.throttlingPeriod = self.am_getOption('ThrottlingPeriod', 0.)

    self.removeStatusDelay['Done'] = self.am_getOption( 'RemoveStatusDelay/Done', 7 )
    self.removeStatusDelay['Killed'] = self.am_getOption( 'RemoveStatusDelay/Killed', 7 )
    self.removeStatusDelay['Failed'] = self.am_getOption( 'RemoveStatusDelay/Failed', 7 )
    self.removeStatusDelay['Any'] = self.am_getOption( 'RemoveStatusDelay/Any', -1 )

    return S_OK()

  def __getAllowedJobTypes( self ):
    """ Get valid jobTypes
    """
    result = self.jobDB.getDistinctJobAttributes( 'JobType' )
    if not result[ 'OK' ]:
      return result
    cleanJobTypes = []
    for jobType in result[ 'Value' ]:
      if jobType not in self.prodTypes:
        cleanJobTypes.append( jobType )
    self.log.notice( "JobTypes to clean %s" % cleanJobTypes )
    return S_OK( cleanJobTypes )

  #############################################################################
  def execute( self ):
    """ Remove jobs in various status
    """
    #Delete jobs in "Deleted" state
    result = self.removeJobsByStatus( { 'Status' : 'Deleted' } )
    if not result[ 'OK' ]:
      return result
    #Get all the Job types that can be cleaned
    result = self.__getAllowedJobTypes()
    if not result[ 'OK' ]:
      return result

    # No jobs in the system subject to removal
    if not result['Value']:
      return S_OK()

    baseCond = { 'JobType' : result[ 'Value' ] }
    # Remove jobs with final status
    for status in self.removeStatusDelay:
      delay = self.removeStatusDelay[ status ]
      if delay < 0:
        # Negative delay means don't delete anything...
        continue
      condDict = dict( baseCond )
      if status != 'Any':
        condDict[ 'Status' ] = status
      delTime = str( Time.dateTime() - delay * Time.day )
#.........這裏部分代碼省略.........
開發者ID:marianne013,項目名稱:DIRAC,代碼行數:101,代碼來源:JobCleaningAgent.py


注:本文中的DIRAC.WorkloadManagementSystem.DB.JobLoggingDB.JobLoggingDB類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。