本文整理汇总了Python中pymesos.MesosSchedulerDriver.stop方法的典型用法代码示例。如果您正苦于以下问题:Python MesosSchedulerDriver.stop方法的具体用法?Python MesosSchedulerDriver.stop怎么用?Python MesosSchedulerDriver.stop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pymesos.MesosSchedulerDriver
的用法示例。
在下文中一共展示了MesosSchedulerDriver.stop方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ProcScheduler
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
#.........这里部分代码省略.........
if slave_id is not None:
if slave_id in self.slave_to_proc:
self.slave_to_proc[slave_id].remove(proc_id)
else:
for slave_id, procs in self.slave_to_proc.iteritems():
if proc_id in procs:
procs.remove(proc_id)
proc._finished(success, message, data)
def statusUpdate(self, driver, update):
with self._lock:
proc_id = int(update.task_id.value)
logger.info("Status update for proc, id=%s, state=%s" % (proc_id, update.state))
if update.state == mesos_pb2.TASK_RUNNING:
if update.slave_id.value in self.slave_to_proc:
self.slave_to_proc[update.slave_id.value].add(proc_id)
else:
self.slave_to_proc[update.slave_id.value] = set([proc_id])
proc = self.procs_launched[proc_id]
proc._started()
elif update.state >= mesos_pb2.TASK_FINISHED:
slave_id = update.slave_id.value
success = update.state == mesos_pb2.TASK_FINISHED
message = update.message
data = update.data and pickle.loads(update.data)
self._call_finished(proc_id, success, message, data, slave_id)
driver.reviveOffers()
def offerRescinded(self, driver, offer_id):
with self._lock:
if self.procs_pending:
logger.info("Revive offers for pending procs")
driver.reviveOffers()
def slaveLost(self, driver, slave_id):
with self._lock:
for proc_id in self.slave_to_proc.pop(slave_id, []):
self._call_finished(proc_id, False, "Slave lost", None, slave_id)
def error(self, driver, message):
with self._lock:
for proc in self.procs_pending.values():
self._call_finished(proc.id, False, "Stopped", None)
for proc in self.procs_launched.values():
self._call_finished(proc.id, False, "Stopped", None)
self.stop()
def start(self):
self.driver.start()
def stop(self):
assert not self.driver.aborted
self.driver.stop()
def submit(self, proc):
if self.driver.aborted:
raise RuntimeError("driver already aborted")
with self._lock:
if proc.id not in self.procs_pending:
logger.info("Try submit proc, id=%s", (proc.id,))
self.procs_pending[proc.id] = proc
if len(self.procs_pending) == 1:
logger.info("Revive offers for pending procs")
self.driver.reviveOffers()
else:
raise ValueError("Proc with same id already submitted")
def cancel(self, proc):
if self.driver.aborted:
raise RuntimeError("driver already aborted")
with self._lock:
if proc.id in self.procs_pending:
del self.procs_pending[proc.id]
elif proc.id in self.procs_launched:
del self.procs_launched[proc.id]
self.driver.killTask(mesos_pb2.TaskID(value=str(proc.id)))
for slave_id, procs in self.slave_to_proc.items():
procs.pop(proc.id)
if not procs:
del self.slave_to_proc[slave_id]
def send_data(self, pid, type, data):
if self.driver.aborted:
raise RuntimeError("driver already aborted")
msg = pickle.dumps((pid, type, data))
for slave_id, procs in self.slave_to_proc.iteritems():
if pid in procs:
self.driver.sendFrameworkMessage(self.executor.executor_id, mesos_pb2.SlaveID(value=slave_id), msg)
return
raise RuntimeError("Cannot find slave for pid %s" % (pid,))
示例2: TFMesosScheduler
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
#.........这里部分代码省略.........
framework.hostname = socket.gethostname()
framework.role = self.role
self.driver = MesosSchedulerDriver(
self, framework, self.master, use_addict=True
)
self.driver.start()
task_start_count = 0
while any((not task.initalized
for id, task in iteritems(self.tasks))):
if readable(lfd):
c, _ = lfd.accept()
if readable(c):
mesos_task_id, addr = recv(c)
task = self.tasks[mesos_task_id]
task.addr = addr
task.connection = c
task.initalized = True
task_start_count += 1
logger.info('Task %s with mesos_task_id %s has '
'registered',
'{}:{}'.format(task.job_name,
task.task_index),
mesos_task_id)
logger.info('Out of %d tasks '
'%d tasks have been registered',
len(self.tasks), task_start_count)
else:
c.close()
self.started = True
self._start_tf_cluster()
except Exception:
self.stop()
raise
finally:
lfd.close()
def registered(self, driver, framework_id, master_info):
logger.info(
'Tensorflow cluster registered. '
'( http://%s:%s/#/frameworks/%s )',
master_info.hostname, master_info.port, framework_id.value
)
if self.containerizer_type is None:
version = tuple(int(x) for x in driver.version.split("."))
self.containerizer_type = (
'MESOS' if version >= (1, 0, 0) else 'DOCKER'
)
def statusUpdate(self, driver, update):
logger.debug('Received status update %s', str(update.state))
mesos_task_id = update.task_id.value
if self._is_terminal_state(update.state):
task = self.tasks.get(mesos_task_id)
if task is None:
# This should be very rare and hence making this info.
logger.info("Task not found for mesos task id {}"
.format(mesos_task_id))
return
if self.started:
if update.state != 'TASK_FINISHED':
logger.error('Task failed: %s, %s with state %s', task,
update.message, update.state)
raise RuntimeError(
示例3: TFMesosScheduler
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
#.........这里部分代码省略.........
'Device /job:%s/task:%s activated @ grpc://%s ',
task.job_name,
task.task_index,
task.addr
)
task.connection.close()
return targets
def start(self):
def readable(fd):
return bool(select.select([fd], [], [], 0.1)[0])
lfd = socket.socket()
try:
lfd.bind(('', 0))
self.addr = '%s:%s' % (socket.gethostname(), lfd.getsockname()[1])
lfd.listen(10)
framework = Dict()
framework.user = getpass.getuser()
framework.name = self.name
framework.hostname = socket.gethostname()
self.driver = MesosSchedulerDriver(
self, framework, self.master, use_addict=True
)
self.driver.start()
while any((not task.initalized for task in self.tasks)):
if readable(lfd):
c, _ = lfd.accept()
if readable(c):
mesos_task_id, addr = recv(c)
assert isinstance(mesos_task_id, int)
task = self.tasks[mesos_task_id]
task.addr = addr
task.connection = c
task.initalized = True
else:
c.close()
self.started = True
return self._start_tf_cluster()
except Exception:
self.stop()
raise
finally:
lfd.close()
def registered(self, driver, framework_id, master_info):
logger.info(
'Tensorflow cluster registered. '
'( http://%s:%s/#/frameworks/%s )',
master_info.hostname, master_info.port, framework_id.value
)
def statusUpdate(self, driver, update):
mesos_task_id = int(update.task_id.value)
if update.state != 'TASK_RUNNING':
task = self.tasks[mesos_task_id]
if self.started:
if update.state != 'TASK_FINISHED':
logger.error('Task failed: %s, %s', task, update.message)
raise RuntimeError(
'Task %s failed! %s' % (id, update.message)
)
else:
logger.warn('Task failed: %s, %s', task, update.message)
if task.connection:
task.connection.close()
driver.reviveOffers()
def slaveLost(self, driver, agent_id):
if self.started:
logger.error('Slave %s lost:', agent_id.value)
raise RuntimeError('Slave %s lost' % agent_id)
def executorLost(self, driver, executor_id, agent_id, status):
if self.started:
logger.error('Executor %s lost:', executor_id.value)
raise RuntimeError('Executor %[email protected]%s lost' % (executor_id, agent_id))
def error(self, driver, message):
logger.error('Mesos error: %s', message)
raise RuntimeError('Error ' + message)
def stop(self):
logger.debug('exit')
if hasattr(self, 'tasks'):
for task in getattr(self, 'tasks', []):
if task.connection:
task.connection.close()
del self.tasks
if hasattr(self, 'driver'):
self.driver.stop()
del self.driver
示例4: spawn_rconsole
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
signal.signal(signal.SIGABRT, handler)
signal.signal(signal.SIGQUIT, handler)
spawn_rconsole(locals())
try:
driver.start()
sched.run(driver)
except KeyboardInterrupt:
logger.warning('stopped by KeyboardInterrupt')
sched.stop(EXIT_KEYBORAD)
except Exception as e:
import traceback
logger.warning('catch unexpected Exception, exit now. %s',
traceback.format_exc())
sched.stop(EXIT_EXCEPTION)
finally:
try:
sched.dump_stats()
except:
logger.exception("dump stats fail, ignore it.")
# sched.lock may be in WRONG status.
# if any thread of sched may use lock or call driver, join it first
driver.stop(False)
driver.join()
# mesos resourses are released, and no racer for lock any more
sched.cleanup()
ctx.term()
sys.exit(sched.ec)
示例5: MesosScheduler
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
class MesosScheduler(DAGScheduler):
def __init__(self, master, options):
DAGScheduler.__init__(self)
self.master = master
self.use_self_as_exec = options.self
self.cpus = options.cpus
self.mem = options.mem
self.task_per_node = options.parallel or multiprocessing.cpu_count()
self.group = options.group
self.logLevel = options.logLevel
self.options = options
self.started = False
self.last_finish_time = 0
self.isRegistered = False
self.executor = None
self.driver = None
self.out_logger = None
self.err_logger = None
self.lock = threading.RLock()
self.init_job()
def init_job(self):
self.activeJobs = {}
self.activeJobsQueue = []
self.taskIdToJobId = {}
self.taskIdToAgentId = {}
self.jobTasks = {}
self.agentTasks = {}
def clear(self):
DAGScheduler.clear(self)
self.init_job()
def start(self):
if not self.out_logger:
self.out_logger = self.start_logger(sys.stdout)
if not self.err_logger:
self.err_logger = self.start_logger(sys.stderr)
def start_driver(self):
name = '[dpark] ' + \
os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:])
if len(name) > 256:
name = name[:256] + '...'
framework = Dict()
framework.user = getuser()
if framework.user == 'root':
raise Exception('dpark is not allowed to run as \'root\'')
framework.name = name
framework.hostname = socket.gethostname()
framework.webui_url = self.options.webui_url
self.driver = MesosSchedulerDriver(
self, framework, self.master, use_addict=True
)
self.driver.start()
logger.debug('Mesos Scheudler driver started')
self.started = True
self.last_finish_time = time.time()
def check():
while self.started:
now = time.time()
if (not self.activeJobs and
now - self.last_finish_time > MAX_IDLE_TIME):
logger.info('stop mesos scheduler after %d seconds idle',
now - self.last_finish_time)
self.stop()
break
time.sleep(1)
spawn(check)
def start_logger(self, output):
sock = env.ctx.socket(zmq.PULL)
port = sock.bind_to_random_port('tcp://0.0.0.0')
def collect_log():
while not self._shutdown:
if sock.poll(1000, zmq.POLLIN):
line = sock.recv()
output.write(line)
spawn(collect_log)
host = socket.gethostname()
addr = 'tcp://%s:%d' % (host, port)
logger.debug('log collecter start at %s', addr)
return addr
@safe
def registered(self, driver, frameworkId, masterInfo):
self.isRegistered = True
logger.debug('connect to master %s:%s, registered as %s',
masterInfo.hostname, masterInfo.port, frameworkId.value)
self.executor = self.getExecutorInfo(str(frameworkId.value))
@safe
#.........这里部分代码省略.........
示例6: TFMesosScheduler
# 需要导入模块: from pymesos import MesosSchedulerDriver [as 别名]
# 或者: from pymesos.MesosSchedulerDriver import stop [as 别名]
#.........这里部分代码省略.........
for task in self.tasks:
response = {
"job_name": task.job_name,
"task_index": task.task_index,
"cpus": task.cpus,
"mem": task.mem,
"cluster_def": cluster_def,
}
send(task.connection, response)
assert recv(task.connection) == "ok"
logger.info(
"Device /job:%s/task:%s activated @ grpc://%s " % (
task.job_name,
task.task_index,
task.addr
)
)
task.connection.close()
return targets
def start(self):
def readable(fd):
return bool(select.select([fd], [], [], 0.1)[0])
lfd = socket.socket()
try:
lfd.bind(('', 0))
self.addr = '%s:%s' % (socket.gethostname(), lfd.getsockname()[1])
lfd.listen(10)
framework = mesos_pb2.FrameworkInfo()
framework.user = getpass.getuser()
framework.name = self.name
framework.hostname = socket.gethostname()
self.driver = MesosSchedulerDriver(self, framework, self.master)
self.driver.start()
while any((not task.initalized for task in self.tasks)):
if readable(lfd):
c, _ = lfd.accept()
if readable(c):
mesos_task_id, addr = recv(c)
assert isinstance(mesos_task_id, int)
task = self.tasks[mesos_task_id]
task.addr = addr
task.connection = c
task.initalized = True
else:
c.close()
return self._start_tf_cluster()
except Exception:
self.stop()
raise
finally:
lfd.close()
def registered(self, driver, framework_id, master_info):
logger.info(
"Tensorflow cluster registered. "
"( http://%s:%s/#/frameworks/%s )" % (
master_info.hostname, master_info.port, framework_id.value
)
)
def statusUpdate(self, driver, update):
mesos_task_id = int(update.task_id.value)
if update.state != mesos_pb2.TASK_RUNNING:
task = self.tasks[mesos_task_id]
if self.started:
logger.error("Task failed: %s" % task)
_raise(RuntimeError('Task %s failed!' % id))
else:
logger.warn("Task failed: %s" % task)
task.connection.close()
driver.reviveOffers()
def slaveLost(self, driver, slaveId):
if self.started:
logger.error("Slave %s lost:" % slaveId.value)
_raise(RuntimeError('Slave %s lost' % slaveId))
def executorLost(self, driver, executorId, slaveId, status):
if self.started:
logger.error("Executor %s lost:" % executorId.value)
_raise(RuntimeError('Executor %[email protected]%s lost' % (executorId, slaveId)))
def error(self, driver, message):
logger.error("Mesos error: %s" % message)
_raise(RuntimeError('Error ' + message))
def stop(self):
logger.debug("exit")
if hasattr(self, "tasks"):
for task in getattr(self, "tasks", []):
task.connection.close()
del self.tasks
if hasattr(self, "driver"):
self.driver.stop()
del self.driver