本文整理汇总了Python中DIRAC.Core.Utilities.TimeLeft.TimeLeft.TimeLeft类的典型用法代码示例。如果您正苦于以下问题:Python TimeLeft类的具体用法?Python TimeLeft怎么用?Python TimeLeft使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TimeLeft类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_getScaledCPU
def test_getScaledCPU( self ):
tl = TimeLeft()
res = tl.getScaledCPU()
self.assertEqual( res, 0 )
tl.scaleFactor = 5.0
tl.normFactor = 5.0
for batch, retValue in [( 'LSF', LSF_ReturnValue )]:
self.tl = importlib.import_module( "DIRAC.Core.Utilities.TimeLeft.TimeLeft" )
rcMock = MagicMock()
rcMock.return_value = S_OK( retValue )
self.tl.runCommand = rcMock
batchSystemName = '%sTimeLeft' % batch
batchPlugin = __import__( 'DIRAC.Core.Utilities.TimeLeft.%s' %
batchSystemName, globals(), locals(), [batchSystemName] )
batchStr = 'batchPlugin.%s()' % ( batchSystemName )
tl.batchPlugin = eval( batchStr )
res = tl.getScaledCPU()
self.assertEqual( res, 0.0 )
for batch, retValue in [( 'SGE', SGE_ReturnValue )]:
self.tl = importlib.import_module( "DIRAC.Core.Utilities.TimeLeft.TimeLeft" )
rcMock = MagicMock()
rcMock.return_value = S_OK( retValue )
self.tl.runCommand = rcMock
batchSystemName = '%sTimeLeft' % batch
batchPlugin = __import__( 'DIRAC.Core.Utilities.TimeLeft.%s' %
batchSystemName, globals(), locals(), [batchSystemName] )
batchStr = 'batchPlugin.%s()' % ( batchSystemName )
tl.batchPlugin = eval( batchStr )
res = tl.getScaledCPU()
self.assertEqual( res, 300.0 )
示例2: __init__
def __init__(self, pid, exeThread, spObject, jobCPUTime, memoryLimit=0, processors=1, systemFlag='linux', jobArgs={}):
""" Constructor, takes system flag as argument.
"""
self.stopSigStartSeconds = int(jobArgs.get('StopSigStartSeconds', 1800)) # 30 minutes
self.stopSigFinishSeconds = int(jobArgs.get('StopSigFinishSeconds', 1800)) # 30 minutes
self.stopSigNumber = int(jobArgs.get('StopSigNumber', 2)) # SIGINT
self.stopSigRegex = jobArgs.get('StopSigRegex', None)
self.stopSigSent = False
self.log = gLogger.getSubLogger("Watchdog")
self.systemFlag = systemFlag
self.exeThread = exeThread
self.wrapperPID = pid
self.appPID = self.exeThread.getCurrentPID()
self.spObject = spObject
self.jobCPUTime = jobCPUTime
self.memoryLimit = memoryLimit
self.calibration = 0
self.initialValues = {}
self.parameters = {}
self.peekFailCount = 0
self.peekRetry = 5
self.processMonitor = ProcessMonitor()
self.checkError = ''
self.currentStats = {}
self.initialized = False
self.count = 0
# defaults
self.testWallClock = 1
self.testDiskSpace = 1
self.testLoadAvg = 1
self.maxWallClockTime = 3 * 24 * 60 * 60
self.testCPUConsumed = 1
self.testCPULimit = 0
self.testMemoryLimit = 0
self.testTimeLeft = 1
self.pollingTime = 10 # 10 seconds
self.checkingTime = 30 * 60 # 30 minute period
self.minCheckingTime = 20 * 60 # 20 mins
self.wallClockCheckSeconds = 5 * 60 # 5 minutes
self.maxWallClockTime = 3 * 24 * 60 * 60 # e.g. 4 days
self.jobPeekFlag = 1 # on / off
self.minDiskSpace = 10 # MB
self.loadAvgLimit = 1000 # > 1000 and jobs killed
self.sampleCPUTime = 30 * 60 # e.g. up to 20mins sample
self.jobCPUMargin = 20 # %age buffer before killing job
self.minCPUWallClockRatio = 5 # ratio %age
self.nullCPULimit = 5 # After 5 sample times return null CPU consumption kill job
self.checkCount = 0
self.wallClockCheckCount = 0
self.nullCPUCount = 0
self.grossTimeLeftLimit = 10 * self.checkingTime
self.timeLeftUtil = TimeLeft()
self.timeLeft = 0
self.littleTimeLeft = False
self.scaleFactor = 1.0
self.processors = processors
示例3: initialize
def initialize( self, loops = 0 ):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring
self.am_setOption( 'MonitoringEnabled', False )
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption( 'MaxCycles', loops )
ceType = self.am_getOption( 'CEType', 'InProcess' )
localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
if localCE:
self.log.info( 'Defining CE from local configuration = %s' % localCE )
ceType = localCE
# Create backend Computing Element
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE( ceType )
if not ceInstance['OK']:
self.log.warn( ceInstance['Message'] )
return ceInstance
self.computingElement = ceInstance['Value']
result = self.computingElement.getDescription()
if not result['OK']:
self.log.warn( "Can not get the CE description" )
return result
ceDict = result['Value']
self.timeLeft = ceDict.get( 'CPUTime', 0.0 )
self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft )
self.initTimes = os.times()
# Localsite options
self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
# Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
self.minimumTimeLeft = self.am_getOption( 'MinimumTimeLeft', 1000 )
self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
self.jobCount = 0
self.matchFailedCount = 0
self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '' )
# Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeftError = ''
self.scaledCPUTime = 0.0
self.pilotInfoReportedFlag = False
return S_OK()
示例4: initialize
def initialize( self, loops = 0 ):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring
self.am_setOption( 'MonitoringEnabled', False )
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption( 'MaxCycles', loops )
ceType = self.am_getOption( 'CEType', 'InProcess' )
localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
if localCE:
self.log.info( 'Defining CE from local configuration = %s' % localCE )
ceType = localCE
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE( ceType )
if not ceInstance['OK']:
self.log.warn( ceInstance['Message'] )
return ceInstance
self.initTimes = os.times()
self.computingElement = ceInstance['Value']
self.diracRoot = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( __file__ ) ) ) )
#Localsite options
self.siteRoot = gConfig.getValue( '/LocalSite/Root', os.getcwd() )
self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
#Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py'
self.jobWrapperTemplate = os.path.join( self.diracRoot,
self.am_getOption( 'JobWrapperTemplate',
defaultWrapperLocation ) )
self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
self.defaultLogLevel = self.am_getOption( 'DefaultLogLevel', 'info' )
self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
self.jobCount = 0
self.matchFailedCount = 0
#Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 )
self.gridCEQueue = gConfig.getValue( '/Resources/Computing/CEDefaults/GridCEQueue', '' )
self.timeLeftError = ''
self.scaledCPUTime = 0.0
self.pilotInfoReportedFlag = False
return S_OK()
示例5: Watchdog
class Watchdog( object ):
#############################################################################
def __init__( self, pid, exeThread, spObject, jobCPUtime, memoryLimit = 0, systemFlag = 'linux2.4' ):
""" Constructor, takes system flag as argument.
"""
self.log = gLogger.getSubLogger( "Watchdog" )
self.systemFlag = systemFlag
self.exeThread = exeThread
self.wrapperPID = pid
self.appPID = self.exeThread.getCurrentPID()
self.spObject = spObject
self.jobCPUtime = jobCPUtime
self.memoryLimit = memoryLimit
self.calibration = 0
self.initialValues = {}
self.parameters = {}
self.peekFailCount = 0
self.peekRetry = 5
self.processMonitor = ProcessMonitor()
self.checkError = ''
self.currentStats = {}
self.initialized = False
self.count = 0
#defaults
self.testWallClock = 1
self.testDiskSpace = 1
self.testLoadAvg = 1
self.maxWallClockTime = 3 * 24 * 60 * 60
self.testCPUConsumed = 1
self.testCPULimit = 0
self.testMemoryLimit = 0
self.testTimeLeft = 1
self.pollingTime = 10 # 10 seconds
self.checkingTime = 30 * 60 # 30 minute period
self.minCheckingTime = 20 * 60 # 20 mins
self.maxWallClockTime = 3 * 24 * 60 * 60 # e.g. 4 days
self.jobPeekFlag = 1 # on / off
self.minDiskSpace = 10 # MB
self.loadAvgLimit = 1000 # > 1000 and jobs killed
self.sampleCPUTime = 30 * 60 # e.g. up to 20mins sample
self.jobCPUMargin = 20 # %age buffer before killing job
self.minCPUWallClockRatio = 5 # ratio %age
self.nullCPULimit = 5 # After 5 sample times return null CPU consumption kill job
self.checkCount = 0
self.nullCPUCount = 0
self.grossTimeLeftLimit = 10 * self.checkingTime
self.timeLeftUtil = TimeLeft()
self.timeLeft = 0
self.littleTimeLeft = False
#############################################################################
def initialize( self, loops = 0 ):
""" Watchdog initialization.
"""
if self.initialized:
self.log.info( 'Watchdog already initialized' )
return S_OK()
else:
self.initialized = True
setup = gConfig.getValue( '/DIRAC/Setup', '' )
if not setup:
return S_ERROR( 'Can not get the DIRAC Setup value' )
wms_instance = getSystemInstance( "WorkloadManagement" )
if not wms_instance:
return S_ERROR( 'Can not get the WorkloadManagement system instance' )
self.section = '/Systems/WorkloadManagement/%s/JobWrapper' % wms_instance
self.maxcount = loops
self.log.verbose( 'Watchdog initialization' )
self.log.info( 'Attempting to Initialize Watchdog for: %s' % ( self.systemFlag ) )
# Test control flags
self.testWallClock = gConfig.getValue( self.section + '/CheckWallClockFlag', 1 )
self.testDiskSpace = gConfig.getValue( self.section + '/CheckDiskSpaceFlag', 1 )
self.testLoadAvg = gConfig.getValue( self.section + '/CheckLoadAvgFlag', 1 )
self.testCPUConsumed = gConfig.getValue( self.section + '/CheckCPUConsumedFlag', 1 )
self.testCPULimit = gConfig.getValue( self.section + '/CheckCPULimitFlag', 0 )
self.testMemoryLimit = gConfig.getValue( self.section + '/CheckMemoryLimitFlag', 0 )
self.testTimeLeft = gConfig.getValue( self.section + '/CheckTimeLeftFlag', 1 )
# Other parameters
self.pollingTime = gConfig.getValue( self.section + '/PollingTime', 10 ) # 10 seconds
self.checkingTime = gConfig.getValue( self.section + '/CheckingTime', 30 * 60 ) # 30 minute period
self.minCheckingTime = gConfig.getValue( self.section + '/MinCheckingTime', 20 * 60 ) # 20 mins
self.maxWallClockTime = gConfig.getValue( self.section + '/MaxWallClockTime', 3 * 24 * 60 * 60 ) # e.g. 4 days
self.jobPeekFlag = gConfig.getValue( self.section + '/JobPeekFlag', 1 ) # on / off
self.minDiskSpace = gConfig.getValue( self.section + '/MinDiskSpace', 10 ) # MB
self.loadAvgLimit = gConfig.getValue( self.section + '/LoadAverageLimit', 1000 ) # > 1000 and jobs killed
self.sampleCPUTime = gConfig.getValue( self.section + '/CPUSampleTime', 30 * 60 ) # e.g. up to 20mins sample
self.jobCPUMargin = gConfig.getValue( self.section + '/JobCPULimitMargin', 20 ) # %age buffer before killing job
self.minCPUWallClockRatio = gConfig.getValue( self.section + '/MinCPUWallClockRatio', 5 ) # ratio %age
self.nullCPULimit = gConfig.getValue( self.section + '/NullCPUCountLimit', 5 ) # After 5 sample times return null CPU consumption kill job
if self.checkingTime < self.minCheckingTime:
self.log.info( 'Requested CheckingTime of %s setting to %s seconds (minimum)' % ( self.checkingTime, self.minCheckingTime ) )
self.checkingTime = self.minCheckingTime
# The time left is returned in seconds @ 250 SI00 = 1 HS06,
#.........这里部分代码省略.........
示例6: JobAgent
class JobAgent( AgentModule ):
""" This agent is what runs in a worker node. The pilot runs it, after having prepared its configuration.
"""
#############################################################################
def initialize( self, loops = 0 ):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring
self.am_setOption( 'MonitoringEnabled', False )
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption( 'MaxCycles', loops )
ceType = self.am_getOption( 'CEType', 'InProcess' )
localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
if localCE:
self.log.info( 'Defining CE from local configuration = %s' % localCE )
ceType = localCE
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE( ceType )
if not ceInstance['OK']:
self.log.warn( ceInstance['Message'] )
return ceInstance
self.initTimes = os.times()
self.computingElement = ceInstance['Value']
#Localsite options
self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
#Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
self.jobCount = 0
self.matchFailedCount = 0
self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '' )
#Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 )
self.timeLeftError = ''
self.scaledCPUTime = 0.0
self.pilotInfoReportedFlag = False
return S_OK()
#############################################################################
def execute( self ):
"""The JobAgent execution method.
"""
if self.jobCount:
#Only call timeLeft utility after a job has been picked up
self.log.info( 'Attempting to check CPU time left for filling mode' )
if self.fillingMode:
if self.timeLeftError:
self.log.warn( self.timeLeftError )
return self.__finish( self.timeLeftError )
self.log.info( '%s normalized CPU units remaining in slot' % ( self.timeLeft ) )
# Need to update the Configuration so that the new value is published in the next matching request
result = self.computingElement.setCPUTimeLeft( cpuTimeLeft = self.timeLeft )
if not result['OK']:
return self.__finish( result['Message'] )
# Update local configuration to be used by submitted job wrappers
localCfg = CFG()
if self.extraOptions:
localConfigFile = os.path.join( '.', self.extraOptions )
else:
localConfigFile = os.path.join( rootPath, "etc", "dirac.cfg" )
localCfg.loadFromFile( localConfigFile )
if not localCfg.isSection('/LocalSite'):
localCfg.createNewSection('/LocalSite')
localCfg.setOption( '/LocalSite/CPUTimeLeft', self.timeLeft )
localCfg.writeToFile( localConfigFile )
else:
return self.__finish( 'Filling Mode is Disabled' )
self.log.verbose( 'Job Agent execution loop' )
available = self.computingElement.available()
if not available['OK'] or not available['Value']:
self.log.info( 'Resource is not available' )
self.log.info( available['Message'] )
return self.__finish( 'CE Not Available' )
self.log.info( available['Message'] )
result = self.computingElement.getDescription()
if not result['OK']:
return result
ceDict = result['Value']
# Add pilot information
gridCE = gConfig.getValue( 'LocalSite/GridCE', 'Unknown' )
if gridCE != 'Unknown':
#.........这里部分代码省略.........
示例7: JobAgent
class JobAgent( AgentModule ):
"""
The specific agents must provide the following methods:
- initialize() for initial settings
- beginExecution()
- execute() - the main method called in the agent cycle
- endExecution()
- finalize() - the graceful exit of the method, this one is usually used
for the agent restart
"""
#############################################################################
def initialize( self, loops = 0 ):
"""Sets default parameters and creates CE instance
"""
#Disable monitoring
self.am_setOption( 'MonitoringEnabled', False )
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption( 'MaxCycles', loops )
ceUniqueID = self.am_getOption( 'CEUniqueID', 'InProcess' )
localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
if localCE:
self.log.info( 'Defining CE from local configuration = %s' % localCE )
ceUniqueID = localCE
ceFactory = ComputingElementFactory()
self.ceName = ceUniqueID
ceInstance = ceFactory.getCE( ceUniqueID )
if not ceInstance['OK']:
self.log.warn( ceInstance['Message'] )
return ceInstance
self.computingElement = ceInstance['Value']
self.diracRoot = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( __file__ ) ) ) )
#Localsite options
self.siteRoot = gConfig.getValue( '/LocalSite/Root', os.getcwd() )
self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
#Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py'
self.jobWrapperTemplate = os.path.join( self.diracRoot,
self.am_getOption( 'JobWrapperTemplate',
defaultWrapperLocation ) )
self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
self.defaultLogLevel = self.am_getOption( 'DefaultLogLevel', 'info' )
self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
self.jobCount = 0
#Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 )
self.gridCEQueue = gConfig.getValue( '/Resources/Computing/CEDefaults/GridCEQueue', '' )
self.timeLeftError = ''
self.scaledCPUTime = 0.0
return S_OK()
#############################################################################
def execute( self ):
"""The JobAgent execution method.
"""
if self.jobCount:
#Only call timeLeft utility after a job has been picked up
self.log.info( 'Attempting to check CPU time left for filling mode' )
if self.fillingMode:
if self.timeLeftError:
self.log.warn( self.timeLeftError )
return self.__finish( self.timeLeftError )
self.log.info( '%s normalized CPU units remaining in slot' % ( self.timeLeft ) )
# Need to update the Configuration so that the new value is published in the next matching request
result = self.computingElement.setCPUTimeLeft( cpuTimeLeft = self.timeLeft )
if not result['OK']:
return self.__finish( result['Message'] )
ceJDL = self.computingElement.getJDL()
resourceJDL = ceJDL['Value']
else:
return self.__finish( 'Filling Mode is Disabled' )
self.log.verbose( 'Job Agent execution loop' )
available = self.computingElement.available()
if not available['OK'] or not available['Value']:
self.log.info( 'Resource is not available' )
self.log.info( available['Message'] )
return self.__finish( 'CE Not Available' )
self.log.info( available['Message'] )
ceJDL = self.computingElement.getJDL()
resourceJDL = ceJDL['Value']
self.log.verbose( resourceJDL )
start = time.time()
jobRequest = self.__requestJob( resourceJDL )
matchTime = time.time() - start
self.log.info( 'MatcherTime = %.2f (s)' % ( matchTime ) )
if not jobRequest['OK']:
if re.search( 'No work available', jobRequest['Message'] ):
self.log.info( 'Job request OK: %s' % ( jobRequest['Message'] ) )
#.........这里部分代码省略.........
示例8: JobAgent
class JobAgent(AgentModule):
""" This agent is what runs in a worker node. The pilot runs it, after having prepared its configuration.
"""
def __init__(self, agentName, loadName, baseAgentName=False, properties=None):
""" Just defines some default parameters
"""
if not properties:
properties = {}
super(JobAgent, self).__init__(agentName, loadName, baseAgentName, properties)
self.ceName = 'InProcess'
self.computingElement = None
self.timeLeft = 0.0
self.initTimes = os.times()
# Localsite options
self.siteName = 'Unknown'
self.pilotReference = 'Unknown'
self.defaultProxyLength = 86400 * 5
# Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = 0.0
self.jobSubmissionDelay = 10
self.fillingMode = False
self.minimumTimeLeft = 1000
self.stopOnApplicationFailure = True
self.stopAfterFailedMatches = 10
self.jobCount = 0
self.matchFailedCount = 0
self.extraOptions = ''
# Timeleft
self.timeLeftUtil = None
self.timeLeftError = ''
self.pilotInfoReportedFlag = False
#############################################################################
def initialize(self, loops=0):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring, logLevel INFO, limited cycles
self.am_setOption('MonitoringEnabled', False)
self.am_setOption('MaxCycles', loops)
ceType = self.am_getOption('CEType', 'InProcess')
localCE = gConfig.getValue('/LocalSite/LocalCE', '')
if localCE:
self.log.info('Defining CE from local configuration = %s' % localCE)
ceType = localCE
# Create backend Computing Element
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE(ceType)
if not ceInstance['OK']:
self.log.warn(ceInstance['Message'])
return ceInstance
self.computingElement = ceInstance['Value']
result = self.computingElement.getDescription()
if not result['OK']:
self.log.warn("Can not get the CE description")
return result
ceDict = result['Value']
self.timeLeft = ceDict.get('CPUTime', self.timeLeft)
self.timeLeft = gConfig.getValue('/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft)
self.initTimes = os.times()
# Localsite options
self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName)
self.pilotReference = gConfig.getValue('/LocalSite/PilotReference', self.pilotReference)
self.defaultProxyLength = gConfig.getValue('/Registry/DefaultProxyLifeTime', self.defaultProxyLength)
# Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor', self.cpuFactor)
self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', self.jobSubmissionDelay)
self.fillingMode = self.am_getOption('FillingModeFlag', self.fillingMode)
self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft', self.minimumTimeLeft)
self.stopOnApplicationFailure = self.am_getOption('StopOnApplicationFailure', self.stopOnApplicationFailure)
self.stopAfterFailedMatches = self.am_getOption('StopAfterFailedMatches', self.stopAfterFailedMatches)
self.extraOptions = gConfig.getValue('/AgentJobRequirements/ExtraOptions', self.extraOptions)
# Timeleft
self.timeLeftUtil = TimeLeft()
return S_OK()
#############################################################################
def execute(self):
"""The JobAgent execution method.
"""
if self.jobCount:
# Temporary mechanism to pass a shutdown message to the agent
if os.path.exists('/var/lib/dirac_drain'):
return self.__finish('Node is being drained by an operator')
# Only call timeLeft utility after a job has been picked up
self.log.info('Attempting to check CPU time left for filling mode')
if self.fillingMode:
if self.timeLeftError:
self.log.warn(self.timeLeftError)
return self.__finish(self.timeLeftError)
self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft))
#.........这里部分代码省略.........
示例9: JobAgent
class JobAgent(AgentModule):
"""
The specific agents must provide the following methods:
- initialize() for initial settings
- beginExecution()
- execute() - the main method called in the agent cycle
- endExecution()
- finalize() - the graceful exit of the method, this one is usually used
for the agent restart
"""
#############################################################################
def initialize(self, loops=0):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring
self.am_setOption("MonitoringEnabled", False)
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption("MaxCycles", loops)
ceType = self.am_getOption("CEType", "InProcess")
localCE = gConfig.getValue("/LocalSite/LocalCE", "")
if localCE:
self.log.info("Defining CE from local configuration = %s" % localCE)
ceType = localCE
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE(ceType)
if not ceInstance["OK"]:
self.log.warn(ceInstance["Message"])
return ceInstance
self.initTimes = os.times()
self.computingElement = ceInstance["Value"]
self.diracRoot = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
# Localsite options
self.siteRoot = gConfig.getValue("/LocalSite/Root", os.getcwd())
self.siteName = gConfig.getValue("/LocalSite/Site", "Unknown")
self.pilotReference = gConfig.getValue("/LocalSite/PilotReference", "Unknown")
self.defaultProxyLength = gConfig.getValue("/Registry/DefaultProxyLifeTime", 86400 * 5)
# Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue("/LocalSite/CPUNormalizationFactor", 0.0)
defaultWrapperLocation = "DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py"
self.jobWrapperTemplate = os.path.join(
self.diracRoot, self.am_getOption("JobWrapperTemplate", defaultWrapperLocation)
)
self.jobSubmissionDelay = self.am_getOption("SubmissionDelay", 10)
self.defaultLogLevel = self.am_getOption("DefaultLogLevel", "info")
self.fillingMode = self.am_getOption("FillingModeFlag", False)
self.stopOnApplicationFailure = self.am_getOption("StopOnApplicationFailure", True)
self.stopAfterFailedMatches = self.am_getOption("StopAfterFailedMatches", 10)
self.jobCount = 0
self.matchFailedCount = 0
# Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeft = gConfig.getValue("/Resources/Computing/CEDefaults/MaxCPUTime", 0.0)
self.gridCEQueue = gConfig.getValue("/Resources/Computing/CEDefaults/GridCEQueue", "")
self.timeLeftError = ""
self.scaledCPUTime = 0.0
self.pilotInfoReportedFlag = False
return S_OK()
#############################################################################
def execute(self):
"""The JobAgent execution method.
"""
if self.jobCount:
# Only call timeLeft utility after a job has been picked up
self.log.info("Attempting to check CPU time left for filling mode")
if self.fillingMode:
if self.timeLeftError:
self.log.warn(self.timeLeftError)
return self.__finish(self.timeLeftError)
self.log.info("%s normalized CPU units remaining in slot" % (self.timeLeft))
# Need to update the Configuration so that the new value is published in the next matching request
result = self.computingElement.setCPUTimeLeft(cpuTimeLeft=self.timeLeft)
if not result["OK"]:
return self.__finish(result["Message"])
else:
return self.__finish("Filling Mode is Disabled")
self.log.verbose("Job Agent execution loop")
available = self.computingElement.available()
if not available["OK"] or not available["Value"]:
self.log.info("Resource is not available")
self.log.info(available["Message"])
return self.__finish("CE Not Available")
self.log.info(available["Message"])
result = self.computingElement.getDescription()
if not result["OK"]:
return result
ceDict = result["Value"]
# Add pilot information
gridCE = gConfig.getValue("LocalSite/GridCE", "Unknown")
#.........这里部分代码省略.........
示例10: JobAgent
class JobAgent(AgentModule):
""" This agent is what runs in a worker node. The pilot runs it, after having prepared its configuration.
"""
#############################################################################
def initialize(self, loops=0):
"""Sets default parameters and creates CE instance
"""
# Disable monitoring
self.am_setOption("MonitoringEnabled", False)
# self.log.setLevel('debug') #temporary for debugging
self.am_setOption("MaxCycles", loops)
ceType = self.am_getOption("CEType", "InProcess")
localCE = gConfig.getValue("/LocalSite/LocalCE", "")
if localCE:
self.log.info("Defining CE from local configuration = %s" % localCE)
ceType = localCE
# Create backend Computing Element
ceFactory = ComputingElementFactory()
self.ceName = ceType
ceInstance = ceFactory.getCE(ceType)
if not ceInstance["OK"]:
self.log.warn(ceInstance["Message"])
return ceInstance
self.computingElement = ceInstance["Value"]
result = self.computingElement.getDescription()
if not result["OK"]:
self.log.warn("Can not get the CE description")
return result
ceDict = result["Value"]
self.timeLeft = ceDict.get("CPUTime", 0.0)
self.timeLeft = gConfig.getValue("/Resources/Computing/CEDefaults/MaxCPUTime", self.timeLeft)
self.initTimes = os.times()
# Localsite options
self.siteName = gConfig.getValue("/LocalSite/Site", "Unknown")
self.pilotReference = gConfig.getValue("/LocalSite/PilotReference", "Unknown")
self.defaultProxyLength = gConfig.getValue("/Registry/DefaultProxyLifeTime", 86400 * 5)
# Agent options
# This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
self.cpuFactor = gConfig.getValue("/LocalSite/CPUNormalizationFactor", 0.0)
self.jobSubmissionDelay = self.am_getOption("SubmissionDelay", 10)
self.fillingMode = self.am_getOption("FillingModeFlag", False)
self.minimumTimeLeft = self.am_getOption("MinimumTimeLeft", 1000)
self.stopOnApplicationFailure = self.am_getOption("StopOnApplicationFailure", True)
self.stopAfterFailedMatches = self.am_getOption("StopAfterFailedMatches", 10)
self.jobCount = 0
self.matchFailedCount = 0
self.extraOptions = gConfig.getValue("/AgentJobRequirements/ExtraOptions", "")
# Timeleft
self.timeLeftUtil = TimeLeft()
self.timeLeftError = ""
self.scaledCPUTime = 0.0
self.pilotInfoReportedFlag = False
return S_OK()
#############################################################################
def execute(self):
"""The JobAgent execution method.
"""
if self.jobCount:
# Only call timeLeft utility after a job has been picked up
self.log.info("Attempting to check CPU time left for filling mode")
if self.fillingMode:
if self.timeLeftError:
self.log.warn(self.timeLeftError)
return self.__finish(self.timeLeftError)
self.log.info("%s normalized CPU units remaining in slot" % (self.timeLeft))
if self.timeLeft <= self.minimumTimeLeft:
return self.__finish("No more time left")
# Need to update the Configuration so that the new value is published in the next matching request
result = self.computingElement.setCPUTimeLeft(cpuTimeLeft=self.timeLeft)
if not result["OK"]:
return self.__finish(result["Message"])
# Update local configuration to be used by submitted job wrappers
localCfg = CFG()
if self.extraOptions:
localConfigFile = os.path.join(".", self.extraOptions)
else:
localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg")
localCfg.loadFromFile(localConfigFile)
if not localCfg.isSection("/LocalSite"):
localCfg.createNewSection("/LocalSite")
localCfg.setOption("/LocalSite/CPUTimeLeft", self.timeLeft)
localCfg.writeToFile(localConfigFile)
else:
return self.__finish("Filling Mode is Disabled")
self.log.verbose("Job Agent execution loop")
available = self.computingElement.available()
if not available["OK"] or not available["Value"]:
self.log.info("Resource is not available")
self.log.info(available["Message"])
return self.__finish("CE Not Available")
#.........这里部分代码省略.........