本文整理汇总了Python中WMCore.BossAir.BossAirAPI.BossAirAPI.kill方法的典型用法代码示例。如果您正苦于以下问题:Python BossAirAPI.kill方法的具体用法?Python BossAirAPI.kill怎么用?Python BossAirAPI.kill使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类WMCore.BossAir.BossAirAPI.BossAirAPI
的用法示例。
在下文中一共展示了BossAirAPI.kill方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: changeSiteState
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def changeSiteState(self, siteName, state):
"""
_changeSiteState_
Set a site to some of the possible states and perform
proper actions with the jobs, according to the state
"""
state2ExitCode = {"Aborted": 71301,
"Draining": 71302,
"Down": 71303}
executingJobs = self.wmbsDAOFactory(classname="Jobs.ListByState")
jobInfo = executingJobs.execute(state='executing')
if jobInfo:
bossAir = BossAirAPI(self.config, noSetup=True)
jobtokill = bossAir.updateSiteInformation(jobInfo, siteName, state in state2ExitCode)
ercode = state2ExitCode.get(state, 71300)
bossAir.kill(jobtokill, errorCode=ercode)
# only now that jobs were updated by the plugin, we flip the site state
setStateAction = self.wmbsDAOFactory(classname="Locations.SetState")
setStateAction.execute(siteName=siteName, state=state,
conn=self.getDBConn(),
transaction=self.existingTransaction())
return
示例2: changeSiteState
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def changeSiteState(self, siteName, state):
"""
_changeSiteState_
Set a site to some of the possible states,
if the state is Aborted we must do extra actions.
"""
setStateAction = self.wmbsDAOFactory(classname = "Locations.SetState")
setStateAction.execute(siteName = siteName, state = state,
conn = self.getDBConn(),
transaction = self.existingTransaction())
executingJobs = self.wmbsDAOFactory(classname = "Jobs.ListByState")
jobInfo = executingJobs.execute(state = 'executing')
if not jobInfo:
# then no jobs to look at
return
bossAir = BossAirAPI(self.config, noSetup = True)
jobtokill = bossAir.updateSiteInformation(jobInfo, siteName, state in ("Aborted","Draining","Down"))
if state == "Aborted":
ercode=71301
elif state == "Draining":
ercode=71302
elif state == "Down":
ercode=71303
else:
ercode=71300
bossAir.kill(jobtokill, errorCode=ercode)
return
示例3: killWorkflow
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig = None):
"""
_killWorkflow_
Kill a workflow that is already executing inside the agent. This will
mark all incomplete jobs as failed and files that belong to all
non-cleanup and non-logcollect subscriptions as failed. The name of the
JSM couch database and the URL to the database must be passed in as well
so the state transitions are logged.
"""
myThread = threading.currentThread()
daoFactory = DAOFactory(package = "WMCore.WMBS",
logger = myThread.logger,
dbinterface = myThread.dbi)
killFilesAction = daoFactory(classname = "Subscriptions.KillWorkflow")
killJobsAction = daoFactory(classname = "Jobs.KillWorkflow")
existingTransaction = False
if myThread.transaction.conn:
existingTransaction = True
else:
myThread.transaction.begin()
killFilesAction.execute(workflowName = workflowName,
conn = myThread.transaction.conn,
transaction = True)
liveJobs = killJobsAction.execute(workflowName = workflowName,
conn = myThread.transaction.conn,
transaction = True)
changeState = ChangeState(jobCouchConfig)
# Deal with any jobs that are running in the batch system
# only works if we can start the API
if bossAirConfig:
bossAir = BossAirAPI(config = bossAirConfig, noSetup = True)
killableJobs = []
for liveJob in liveJobs:
if liveJob["state"].lower() == 'executing':
# Then we need to kill this on the batch system
liveWMBSJob = Job(id = liveJob["id"])
liveWMBSJob.update(liveJob)
changeState.propagate(liveWMBSJob, "killed", liveJob["state"])
killableJobs.append(liveJob)
# Now kill them
try:
bossAir.kill(jobs = killableJobs)
except BossAirException, ex:
# Something's gone wrong
# Jobs not killed!
logging.error("Error while trying to kill running jobs in workflow!\n")
logging.error(str(ex))
trace = getattr(ex, 'traceback', '')
logging.error(trace)
# But continue; we need to kill the jobs in the master
# the batch system will have to take care of itself.
pass
示例4: testF_WMSMode
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def testF_WMSMode(self):
"""
_WMSMode_
Try running things in WMS Mode.
"""
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning))
config = self.getConfig()
config.BossAir.pluginName = 'PyCondorPlugin'
config.BossAir.submitWMSMode = True
baAPI = BossAirAPI(config = config)
workload = self.createTestWorkload()
workloadName = "basicWorkload"
changeState = ChangeState(config)
nSubs = 5
nJobs = 10
cacheDir = os.path.join(self.testDir, 'CacheDir')
jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
task = workload.getTask("ReReco"),
workloadSpec = os.path.join(self.testDir,
'workloadTest',
workloadName),
site = None)
for group in jobGroupList:
changeState.propagate(group.jobs, 'created', 'new')
jobSubmitter = JobSubmitterPoller(config = config)
jobSubmitter.algorithm()
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, nSubs * nJobs)
baAPI.track()
idleJobs = baAPI._loadByStatus(status = 'Idle')
baAPI.kill(jobs = idleJobs)
del jobSubmitter
return
示例5: testT_updateJobInfo
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def testT_updateJobInfo(self):
"""
_updateJobInfo_
Test the updateSiteInformation method from CondorPlugin.py
"""
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning))
config = self.getConfig()
config.BossAir.pluginName = 'CondorPlugin'
config.BossAir.submitWMSMode = True
baAPI = BossAirAPI(config=config)
workload = self.createTestWorkload()
workloadName = "basicWorkload"
changeState = ChangeState(config)
nSubs = 1
nJobs = 2
dummycacheDir = os.path.join(self.testDir, 'CacheDir')
jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
task=workload.getTask("ReReco"),
workloadSpec=os.path.join(self.testDir,
'workloadTest',
workloadName),
site="se.T2_US_UCSD")
for group in jobGroupList:
changeState.propagate(group.jobs, 'created', 'new')
jobSubmitter = JobSubmitterPoller(config=config)
jobSubmitter.algorithm()
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, nSubs * nJobs)
baAPI.track()
idleJobs = baAPI._loadByStatus(status='Idle')
##
# Make one of the sites in the sitelist to be True for ABORTED/DRAINING/DOWN
# updateSiteInformation() method should edit the classAd for all the jobs
# that are bound for the site
# Check the Q manually using condor_q -l <job id>
#
jtok = baAPI.updateSiteInformation(idleJobs, "T2_US_UCSD", True)
if jtok != None:
baAPI.kill(jtok, errorCode=71301) # errorCode can be either 71301/71302/71303 (Aborted/Draining/Down)
return
示例6: changeSiteState
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def changeSiteState(self, siteName, state):
"""
_changeSiteState_
Set a site to some of the possible states,
if the state is Aborted we must do extra actions.
"""
setStateAction = self.wmbsDAOFactory(classname = "Locations.SetState")
setStateAction.execute(siteName = siteName, state = state,
conn = self.getDBConn(),
transaction = self.existingTransaction())
if state == "Aborted" and self.config:
# Kill all jobs in the batch system assigned to this site
executingJobs = self.wmbsDAOFactory(classname = "Jobs.ListByStateAndLocation")
jobIds = executingJobs.execute(state = 'executing', location = siteName)
bossAir = BossAirAPI(self.config, noSetup = True)
bossAir.kill(jobIds, errorCode = 61301)
return
示例7: testC_CondorTest
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def testC_CondorTest(self):
"""
_CondorTest_
This test works on the CondorPlugin, checking all of
its functions with a single set of jobs
"""
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning))
# Get the config and set the removal time to -10 for testing
config = self.getConfig()
config.BossAir.removeTime = -10.0
nJobs = 10
jobDummies = self.createDummyJobs(nJobs = nJobs)
baAPI = BossAirAPI(config = config)
print self.testDir
jobPackage = os.path.join(self.testDir, 'JobPackage.pkl')
f = open(jobPackage, 'w')
f.write(' ')
f.close()
sandbox = os.path.join(self.testDir, 'sandbox.box')
f = open(sandbox, 'w')
f.write(' ')
f.close()
jobList = []
for j in jobDummies:
tmpJob = {'id': j['id']}
tmpJob['custom'] = {'location': 'malpaquet'}
tmpJob['name'] = j['name']
tmpJob['cache_dir'] = self.testDir
tmpJob['retry_count'] = 0
tmpJob['plugin'] = 'CondorPlugin'
tmpJob['owner'] = 'tapas'
tmpJob['packageDir'] = self.testDir
tmpJob['sandbox'] = sandbox
tmpJob['priority'] = None
tmpJob['usergroup'] = "wheel"
tmpJob['userrole'] = 'cmsuser'
jobList.append(tmpJob)
info = {}
#info['packageDir'] = self.testDir
info['index'] = 0
info['sandbox'] = sandbox
baAPI.submit(jobs = jobList, info = info)
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, nJobs)
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), nJobs)
baAPI.track()
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), 0)
newJobs = baAPI._loadByStatus(status = 'Idle')
self.assertEqual(len(newJobs), nJobs)
# Do a second time to make sure that the cache
# doesn't die on us
baAPI.track()
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), 0)
newJobs = baAPI._loadByStatus(status = 'Idle')
self.assertEqual(len(newJobs), nJobs)
baAPI.kill(jobs = jobList)
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, 0)
# Try resubmission
for j in jobList:
j['retry_count'] = 1
baAPI.submit(jobs = jobList, info = info)
nRunning = getCondorRunningJobs(self.user)
self.assertEqual(nRunning, nJobs)
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), nJobs)
# See where they are
#.........这里部分代码省略.........
示例8: StatusPoller
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
class StatusPoller(BaseWorkerThread):
"""
_StatusPoller_
Prototype for polling for
JobStatusAir
"""
def __init__(self, config):
"""
__init__
Set up the caching and other objects
"""
self.config = config
BaseWorkerThread.__init__(self)
self.cachedJobs = []
self.bossAir = BossAirAPI(config=config)
# With no timeouts, nothing ever happens
# Otherwise we expect a dictionary with the keys representing
# the states and the values the timeouts.
self.timeouts = getattr(config.JobStatusLite, 'stateTimeouts')
return
@timeFunction
def algorithm(self, parameters=None):
"""
_algorithm_
Handle any exceptions with the actual code
"""
myThread = threading.currentThread()
try:
logging.info("Running job status poller algorithm...")
self.checkStatus()
except WMException as ex:
if getattr(myThread, 'transaction', None):
myThread.transaction.rollbackForError()
raise
except Exception as ex:
msg = "Unhandled error in statusPoller"
msg += str(ex)
logging.exception(msg)
if getattr(myThread, 'transaction', None):
myThread.transaction.rollbackForError()
raise StatusPollerException(msg)
return
def checkStatus(self):
"""
_checkStatus_
Run the BossAir track() function (self-contained)
and then check for jobs that have timed out.
"""
runningJobs = self.bossAir.track()
if len(runningJobs) < 1:
# Then we have no jobs
return
if not self.timeouts:
# Then we've set ourselves to have no timeouts
# Get out and stay out
return
# Look for jobs that need to be killed
jobsToKill = defaultdict(list)
# Now check for timeouts
for job in runningJobs:
globalState = job.get('globalState', 'Error')
statusTime = job.get('status_time', None)
timeout = self.timeouts.get(globalState, None)
if statusTime == 0:
logging.error("Not killing job %i, the status time was zero", job['id'])
continue
if timeout and statusTime:
if time.time() - float(statusTime) > float(timeout):
# Timeout status is used by JobTracker to fail jobs in WMBS database
logging.info("Killing job %i because it has exceeded timeout for status '%s'", job['id'], globalState)
job['status'] = 'Timeout'
jobsToKill[globalState].append(job)
timeOutCodeMap = {"Running": 71304, "Pending": 71305, "Error": 71306}
# We need to show that the jobs are in state timeout
# and then kill them.
jobsToKillList = flattenList(jobsToKill.values())
myThread = threading.currentThread()
myThread.transaction.begin()
self.bossAir.update(jobs=jobsToKillList)
for preJobStatus in jobsToKill:
eCode = timeOutCodeMap.get(preJobStatus, 71307) # it shouldn't have 71307 (states should be among Running, Pending, Error)
#.........这里部分代码省略.........
示例9: testH_ARCTest
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def testH_ARCTest(self):
"""
_ARCTest_
This test works on the ARCPlugin, checking all of
its functions with a single set of jobs
"""
nRunning = getNArcJobs()
self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning))
config = self.getConfig()
config.BossAir.pluginNames.append("ARCPlugin")
#config.BossAir.pluginNames = ["ARCPlugin"]
baAPI = BossAirAPI(config = config)
nJobs = 2
jobDummies = self.createDummyJobs(nJobs = nJobs, location = 'jade-cms.hip.fi')
#baAPI.createNewJobs(wmbsJobs = jobDummies)
#changeState = ChangeState(config)
#changeState.propagate(jobDummies, 'created', 'new')
#changeState.propagate(jobDummies, 'executing', 'created')
jobPackage = os.path.join(self.testDir, 'JobPackage.pkl')
f = open(jobPackage, 'w')
f.write(' ')
f.close()
sandbox = os.path.join(self.testDir, 'sandbox.box')
f = open(sandbox, 'w')
f.write(' ')
f.close()
jobList = []
for j in jobDummies:
job = j # {'id': j['id']}
job['custom'] = {'location': 'jade-cms.hip.fi'}
job['location'] = 'jade-cms.hip.fi'
job['plugin'] = 'ARCPlugin'
job['name'] = j['name']
job['cache_dir'] = self.testDir
job['retry_count'] = 0
job['owner'] = 'edelmann'
job['packageDir'] = self.testDir
job['sandbox'] = sandbox
job['priority'] = None
jobList.append(job)
baAPI.submit(jobs = jobList)
nRunning = getNArcJobs()
self.assertEqual(nRunning, nJobs)
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), nJobs)
baAPI.track()
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), 0)
rJobs = baAPI._listRunJobs()
nOldJobs = 0
for j in rJobs:
if j['status'] != "New":
nOldJobs += 1
self.assertEqual(nOldJobs, nJobs)
#if baAPI.plugins['ARCPlugin'].stateDict[j['status']] in [ "Pending", "Running" ]:
baAPI.kill(jobs = jobList)
nRunning = getNArcJobs()
self.assertEqual(nRunning, 0)
# Try resubmission
for j in jobList:
j['retry_count'] = 1
succ, fail = baAPI.submit(jobs = jobList)
time.sleep(30)
nRunning = getNArcJobs()
self.assertEqual(nRunning, nJobs)
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), nJobs)
# See where they are
baAPI.track()
newJobs = baAPI._loadByStatus(status = 'New')
self.assertEqual(len(newJobs), 0)
rJobs = baAPI._listRunJobs()
nOldJobs = 0
idStr = ""
for j in rJobs:
idStr += " " + j['gridid']
#.........这里部分代码省略.........
示例10: StatusPoller
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
class StatusPoller(BaseWorkerThread):
"""
_StatusPoller_
Prototype for polling for
JobStatusAir
"""
def __init__(self, config):
"""
__init__
Set up the caching and other objects
"""
self.config = config
BaseWorkerThread.__init__(self)
self.cachedJobs = []
self.bossAir = BossAirAPI(config=config)
# With no timeouts, nothing ever happens
# Otherwise we expect a dictionary with the keys representing
# the states and the values the timeouts.
self.timeouts = getattr(config.JobStatusLite, 'stateTimeouts', {})
# init alert system
self.initAlerts(compName="StatusPoller")
return
def algorithm(self, parameters=None):
"""
_algorithm_
Handle any exceptions with the actual code
"""
myThread = threading.currentThread()
try:
self.checkStatus()
except WMException as ex:
if getattr(myThread, 'transaction', None):
myThread.transaction.rollbackForError()
self.sendAlert(6, msg=str(ex))
raise
except Exception as ex:
msg = "Unhandled error in statusPoller"
msg += str(ex)
logging.exception(msg)
self.sendAlert(6, msg=msg)
if getattr(myThread, 'transaction', None):
myThread.transaction.rollbackForError()
raise StatusPollerException(msg)
return
def checkStatus(self):
"""
_checkStatus_
Run the BossAir track() function (self-contained)
and then check for jobs that have timed out.
"""
runningJobs = self.bossAir.track()
if len(runningJobs) < 1:
# Then we have no jobs
return
if self.timeouts == {}:
# Then we've set outself to have no timeouts
# Get out and stay out
return
# Look for jobs that need to be killed
jobsToKill = []
# Now check for timeouts
for job in runningJobs:
globalState = job.get('globalState', 'Error')
statusTime = job.get('status_time', None)
timeout = self.timeouts.get(globalState, None)
if statusTime == 0:
logging.error("Not killing job %i, the status time was zero" % job['id'])
continue
if timeout != None and statusTime != None:
if time.time() - float(statusTime) > float(timeout):
# Then the job needs to be killed.
logging.info("Killing job %i because it has exceeded timeout for status %s" % (job['id'], globalState))
job['status'] = 'Timeout'
jobsToKill.append(job)
# We need to show that the jobs are in state timeout
# and then kill them.
myThread = threading.currentThread()
myThread.transaction.begin()
self.bossAir.update(jobs=jobsToKill)
self.bossAir.kill(jobs=jobsToKill, killMsg=WM_JOB_ERROR_CODES[61304], errorCode=61304)
myThread.transaction.commit()
#.........这里部分代码省略.........
示例11: killWorkflow
# 需要导入模块: from WMCore.BossAir.BossAirAPI import BossAirAPI [as 别名]
# 或者: from WMCore.BossAir.BossAirAPI.BossAirAPI import kill [as 别名]
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None):
"""
_killWorkflow_
Kill a workflow that is already executing inside the agent. This will
mark all incomplete jobs as failed and files that belong to all
non-cleanup and non-logcollect subscriptions as failed. The name of the
JSM couch database and the URL to the database must be passed in as well
so the state transitions are logged.
"""
myThread = threading.currentThread()
daoFactory = DAOFactory(package="WMCore.WMBS",
logger=myThread.logger,
dbinterface=myThread.dbi)
killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow")
killJobsAction = daoFactory(classname="Jobs.KillWorkflow")
killFilesAction.execute(workflowName=workflowName,
conn=myThread.transaction.conn)
liveJobs = killJobsAction.execute(workflowName=workflowName,
conn=myThread.transaction.conn)
changeState = ChangeState(jobCouchConfig)
# Deal with any jobs that are running in the batch system
# only works if we can start the API
if bossAirConfig:
bossAir = BossAirAPI(config=bossAirConfig, noSetup=True)
killableJobs = []
for liveJob in liveJobs:
if liveJob["state"].lower() == 'executing':
# Then we need to kill this on the batch system
liveWMBSJob = Job(id=liveJob["id"])
liveWMBSJob.update(liveJob)
killableJobs.append(liveJob)
# Now kill them
try:
logging.info("Killing %d jobs for workflow: %s", len(killableJobs), workflowName)
bossAir.kill(jobs=killableJobs, workflowName=workflowName)
except BossAirException as ex:
# Something's gone wrong. Jobs not killed!
logging.error("Error while trying to kill running jobs in workflow!\n")
logging.error(str(ex))
trace = getattr(ex, 'traceback', '')
logging.error(trace)
# But continue; we need to kill the jobs in the master
# the batch system will have to take care of itself.
liveWMBSJobs = defaultdict(list)
for liveJob in liveJobs:
if liveJob["state"] == "killed":
# Then we've killed it already
continue
liveWMBSJob = Job(id=liveJob["id"])
liveWMBSJob.update(liveJob)
liveWMBSJobs[liveJob["state"]].append(liveWMBSJob)
for state, jobsByState in liveWMBSJobs.items():
if len(jobsByState) > 100 and state != "executing":
# if there are to many jobs skip the couch and dashboard update
# TODO: couch and dashboard need to be updated or parallel.
changeState.check("killed", state)
changeState.persist(jobsByState, "killed", state)
else:
changeState.propagate(jobsByState, "killed", state)
return