本文整理汇总了Python中apache.thermos.common.ckpt.CheckpointDispatcher类的典型用法代码示例。如果您正苦于以下问题:Python CheckpointDispatcher类的具体用法?Python CheckpointDispatcher怎么用?Python CheckpointDispatcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CheckpointDispatcher类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read
def read(args, options):
"""Replay a thermos checkpoint.
Usage: thermos read [options] checkpoint_filename
Options:
--simple Do not replay the full task state machine. Only print out the contents of
each checkpoint log message.
"""
if len(args) != 1:
app.error('Expected one checkpoint file, got %s' % len(args))
if not os.path.exists(args[0]):
app.error('Could not find %s' % args[0])
dispatcher = CheckpointDispatcher()
state = RunnerState(processes={})
with open(args[0], 'r') as fp:
try:
for record in ThriftRecordReader(fp, RunnerCkpt):
if not options.simple:
dispatcher.dispatch(state, record)
else:
print('CKPT: %s' % record)
except RecordIO.Error as err:
print("Failed to recover from %s: %s" % (fp.name, err))
return
if not options.simple:
if state is None or state.header is None:
print('Checkpoint stream CORRUPT or outdated format')
return
print('Recovered Task Header:')
print(' id: %s' % state.header.task_id)
print(' user: %s' % state.header.user)
print(' host: %s' % state.header.hostname)
print(' sandbox: %s' % state.header.sandbox)
if state.header.ports:
print(' ports: %s' % ' '.join(
'%s->%s' % (name, port) for (name, port) in state.header.ports.items()))
print('Recovered Task States:')
for task_status in state.statuses:
print(' %s [pid: %d] => %s' % (
time.asctime(time.localtime(task_status.timestamp_ms / 1000.0)),
task_status.runner_pid,
TaskState._VALUES_TO_NAMES[task_status.state]))
print('Recovered Processes:')
for process, process_history in state.processes.items():
print(' %s runs: %s' % (process, len(process_history)))
for k in reversed(range(len(process_history))):
run = process_history[k]
print(' %2d: pid=%d, rc=%s, finish:%s, state:%s' % (
k,
run.pid,
run.return_code if run.return_code is not None else '',
time.asctime(time.localtime(run.stop_time)) if run.stop_time else 'None',
ProcessState._VALUES_TO_NAMES.get(run.state, 'Unknown')))
示例2: tail
def tail(args, options):
"""Tail the logs of a task process.
Usage: thermos tail task_name [process_name]
"""
if len(args) == 0:
app.error("Expected a task to tail, got nothing!")
if len(args) not in (1, 2):
app.error("Expected at most two arguments (task and optional process), got %d" % len(args))
task_id = args[0]
detector = TaskDetector(root=options.root)
checkpoint = CheckpointDispatcher.from_file(detector.get_checkpoint(task_id))
log_dir = checkpoint.header.log_dir
process_runs = [(process, run) for (process, run) in detector.get_process_runs(task_id, log_dir)]
if len(args) == 2:
process_runs = [(process, run) for (process, run) in process_runs if process == args[1]]
if len(process_runs) == 0:
print("ERROR: No processes found.", file=sys.stderr)
sys.exit(1)
processes = set([process for process, _ in process_runs])
if len(processes) != 1:
print("ERROR: More than one process matches query.", file=sys.stderr)
sys.exit(1)
process = processes.pop()
run = max([run for _, run in process_runs])
logdir = TaskPath(root=options.root, task_id=args[0], process=process, run=run, log_dir=log_dir).getpath(
"process_logdir"
)
logfile = os.path.join(logdir, "stderr" if options.use_stderr else "stdout")
monitor = TaskMonitor(TaskPath(root=options.root), args[0])
def log_is_active():
active_processes = monitor.get_active_processes()
for process_status, process_run in active_processes:
if process_status.process == process and process_run == run:
return True
return False
if not log_is_active():
print("Tail of terminal log %s" % logfile)
for line in tail_closed(logfile):
print(line.rstrip())
return
now = time.time()
next_check = now + 5.0
print("Tail of active log %s" % logfile)
for line in tail_f(logfile, include_last=True, forever=False):
print(line.rstrip())
if time.time() > next_check:
if not log_is_active():
break
else:
next_check = time.time() + 5.0
示例3: get
def get(cls, task_id, checkpoint_root):
"""
Get a TaskRunner bound to the task_id in checkpoint_root.
"""
path = TaskPath(root=checkpoint_root, task_id=task_id, state="active")
task_json = path.getpath("task_path")
task_checkpoint = path.getpath("runner_checkpoint")
if not os.path.exists(task_json):
return None
task = ThermosConfigLoader.load_json(task_json)
if task is None:
return None
if len(task.tasks()) == 0:
return None
try:
checkpoint = CheckpointDispatcher.from_file(task_checkpoint)
if checkpoint is None or checkpoint.header is None:
return None
return cls(
task.tasks()[0].task(),
checkpoint_root,
checkpoint.header.sandbox,
log_dir=checkpoint.header.log_dir,
task_id=task_id,
portmap=checkpoint.header.ports,
hostname=checkpoint.header.hostname,
)
except Exception as e:
log.error("Failed to reconstitute checkpoint in TaskRunner.get: %s" % e, exc_info=True)
return None
示例4: get_states
def get_states(self, task_id):
"""Returns the (timestamp, status) tuples of the task or [] if could not replay."""
statuses = CheckpointDispatcher.iter_statuses(self._runner_ckpt(task_id))
try:
return [(state.timestamp_ms / 1000.0, state.state) for state in statuses]
except CheckpointDispatcher.ErrorRecoveringState:
return []
示例5: get_sandbox
def get_sandbox(self, task_id):
"""Returns the sandbox of the task, or None if it has not yet been initialized."""
try:
for update in CheckpointDispatcher.iter_updates(self._runner_ckpt(task_id)):
if update.runner_header and update.runner_header.sandbox:
return update.runner_header.sandbox
except CheckpointDispatcher.ErrorRecoveringState:
return None
示例6: run
def run(self):
self._run_count += 1
atexit.register(self.cleanup)
if self.script_filename:
os.unlink(self.script_filename)
with temporary_file(cleanup=False) as fp:
self.script_filename = fp.name
fp.write(self.RUN_JOB_SCRIPT % {
'filename': self.job_filename,
'sandbox': self.sandbox,
'root': self.tempdir,
'task_id': self.task_id,
'state_filename': self.state_filename,
'success_rate': self.success_rate,
'random_seed': self.random_seed + self._run_count,
'extra_task_runner_args': self.extra_task_runner_args,
})
with environment_as(PYTHONPATH=os.pathsep.join(sys.path)):
self.po = subprocess.Popen([sys.executable, self.script_filename],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
so, se = self.po.communicate()
except OSError as e:
if e.errno == errno.ECHILD:
so = se = 'Killed'
else:
raise
rc = self.po.returncode
if rc != 0:
if os.path.exists(self.job_filename):
with open(self.job_filename) as fp:
config = fp.read()
else:
config = 'Nonexistent!'
if 'THERMOS_DEBUG' in os.environ:
print("Runner failed!\n\n\nconfig:%s\n\n\nstdout:%s\n\n\nstderr:%s\n\n\n" % (
config, so, se))
try:
with open(self.state_filename, 'r') as fp:
self.state = thrift_deserialize(RunnerState(), fp.read())
except Exception as e:
if 'THERMOS_DEBUG' in os.environ:
print('Failed to load Runner state: %s' % e, file=sys.stderr)
self.state = RunnerState()
try:
self.reconstructed_state = CheckpointDispatcher.from_file(
self.pathspec.getpath('runner_checkpoint'))
except Exception as e:
print('Failed to replay checkpoint: %s' % e, file=sys.stderr)
self.reconstructed_state = None
self.initialized = True
return rc
示例7: __init__
def __init__(self, pathspec, task_id):
self._task_id = task_id
self._dispatcher = CheckpointDispatcher()
self._runnerstate = RunnerState(processes={})
self._runner_ckpt = pathspec.given(task_id=task_id).getpath('runner_checkpoint')
self._active_file, self._finished_file = (
pathspec.given(task_id=task_id, state=state).getpath('task_path')
for state in ('active', 'finished'))
self._ckpt_head = 0
self._apply_states()
self._lock = threading.Lock()
示例8: kill
def kill(cls, task_id, checkpoint_root, force=False,
terminal_status=TaskState.KILLED, clock=time):
"""
An implementation of Task killing that doesn't require a fully hydrated TaskRunner object.
Terminal status must be either KILLED or LOST state.
"""
if terminal_status not in (TaskState.KILLED, TaskState.LOST):
raise cls.Error('terminal_status must be KILLED or LOST (got %s)' %
TaskState._VALUES_TO_NAMES.get(terminal_status) or terminal_status)
pathspec = TaskPath(root=checkpoint_root, task_id=task_id)
checkpoint = pathspec.getpath('runner_checkpoint')
state = CheckpointDispatcher.from_file(checkpoint)
if state is None or state.header is None or state.statuses is None:
if force:
log.error('Task has uninitialized TaskState - forcibly finalizing')
cls.finalize_task(pathspec)
return
else:
log.error('Cannot update states in uninitialized TaskState!')
return
ckpt = cls.open_checkpoint(checkpoint, force=force, state=state)
def write_task_state(state):
update = TaskStatus(state=state, timestamp_ms=int(clock.time() * 1000),
runner_pid=os.getpid(), runner_uid=os.getuid())
ckpt.write(RunnerCkpt(task_status=update))
def write_process_status(status):
ckpt.write(RunnerCkpt(process_status=status))
if cls.is_task_terminal(state.statuses[-1].state):
log.info('Task is already in terminal state! Finalizing.')
cls.finalize_task(pathspec)
return
with closing(ckpt):
write_task_state(TaskState.ACTIVE)
for process, history in state.processes.items():
process_status = history[-1]
if not cls.is_process_terminal(process_status.state):
if cls.kill_process(state, process):
write_process_status(ProcessStatus(process=process,
state=ProcessState.KILLED, seq=process_status.seq + 1, return_code=-9,
stop_time=clock.time()))
else:
if process_status.state is not ProcessState.WAITING:
write_process_status(ProcessStatus(process=process,
state=ProcessState.LOST, seq=process_status.seq + 1))
write_task_state(terminal_status)
cls.finalize_task(pathspec)
示例9: main
def main(args):
values = app.get_options()
if len(args) > 0:
print("ERROR: unrecognized arguments: %s\n" % (" ".join(args)), file=sys.stderr)
app.help()
sys.exit(1)
if not values.ckpt:
print("ERROR: must supply --checkpoint", file=sys.stderr)
app.help()
sys.exit(1)
fp = file(values.ckpt, "r")
rr = ThriftRecordReader(fp, RunnerCkpt)
wrs = RunnerState(processes={})
dispatcher = CheckpointDispatcher()
try:
for wts in rr:
print('Recovering: %s' % wts)
if values.assemble is True:
dispatcher.dispatch(wrs, wts)
except RecordIO.Error as err:
print('Error recovering checkpoint stream: %s' % err, file=sys.stderr)
return
print('\n\n\n')
if values.assemble:
print('Recovered Task Header')
pprint.pprint(wrs.header, indent=4)
print('\nRecovered Task States')
for task_status in wrs.statuses:
print(' %s [pid: %d] => %s' % (
time.asctime(time.localtime(task_status.timestamp_ms / 1000.0)),
task_status.runner_pid,
TaskState._VALUES_TO_NAMES[task_status.state]))
print('\nRecovered Processes')
pprint.pprint(wrs.processes, indent=4)
示例10: __init__
def __init__(self, root, task_id):
"""Construct a TaskMonitor.
:param root: The checkpoint root of the task.
:param task_id: The task id of the task.
"""
pathspec = TaskPath(root=root, task_id=task_id)
self._dispatcher = CheckpointDispatcher()
self._runnerstate = RunnerState(processes={})
self._runner_ckpt = pathspec.getpath("runner_checkpoint")
self._active_file, self._finished_file = (
pathspec.given(state=state).getpath("task_path") for state in ("active", "finished")
)
self._ckpt_head = 0
self._apply_states()
self._lock = threading.Lock()
示例11: format_task
def format_task(task_id):
checkpoint_filename = detector.get_checkpoint(task_id)
checkpoint_stat = os.stat(checkpoint_filename)
try:
checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
except:
checkpoint_owner = "uid:%s" % checkpoint_stat.st_uid
print(" %-20s [owner: %8s]" % (task_id, checkpoint_owner), end="")
if options.verbose == 0:
print()
if options.verbose > 0:
state = CheckpointDispatcher.from_file(checkpoint_filename)
if state is None or state.header is None:
print(" - checkpoint stream CORRUPT or outdated format")
return
print(" state: %8s" % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, "Unknown"), end="")
print(" start: %25s" % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
if options.verbose > 1:
print(" user: %s" % state.header.user, end="")
if state.header.ports:
print(" ports: %s" % " ".join("%s -> %s" % (key, val) for key, val in state.header.ports.items()))
else:
print(" ports: None")
print(" sandbox: %s" % state.header.sandbox)
if options.verbose > 2:
print(" process table:")
for process, process_history in state.processes.items():
print(" - %s runs: %s" % (process, len(process_history)), end="")
last_run = process_history[-1]
print(
" last: pid=%s, rc=%s, finish:%s, state:%s"
% (
last_run.pid or "None",
last_run.return_code if last_run.return_code is not None else "",
time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else "None",
ProcessState._VALUES_TO_NAMES.get(last_run.state, "Unknown"),
)
)
print()
示例12: format_task
def format_task(task_id):
checkpoint_filename = detector.get_checkpoint(task_id)
checkpoint_stat = os.stat(checkpoint_filename)
try:
checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
except KeyError:
checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid
print(' %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='')
if options.verbose == 0:
print()
if options.verbose > 0:
state = CheckpointDispatcher.from_file(checkpoint_filename)
if state is None or state.header is None:
print(' - checkpoint stream CORRUPT or outdated format')
return
print(' state: %8s' % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, 'Unknown'),
end='')
print(' start: %25s' % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
if options.verbose > 1:
print(' user: %s' % state.header.user, end='')
if state.header.ports:
print(' ports: %s' % ' '.join('%s -> %s' % (key, val)
for key, val in state.header.ports.items()))
else:
print(' ports: None')
print(' sandbox: %s' % state.header.sandbox)
if options.verbose > 2:
print(' process table:')
for process, process_history in state.processes.items():
print(' - %s runs: %s' % (process, len(process_history)), end='')
last_run = process_history[-1]
print(' last: pid=%s, rc=%s, finish:%s, state:%s' % (
last_run.pid or 'None',
last_run.return_code if last_run.return_code is not None else '',
time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None',
ProcessState._VALUES_TO_NAMES.get(last_run.state, 'Unknown')))
print()
示例13: open_checkpoint
def open_checkpoint(cls, filename, force=False, state=None):
"""
Acquire a locked checkpoint stream.
"""
safe_mkdir(os.path.dirname(filename))
fp = lock_file(filename, "a+")
if fp in (None, False):
if force:
log.info('Found existing runner, forcing leadership forfeit.')
state = state or CheckpointDispatcher.from_file(filename)
if cls.kill_runner(state):
log.info('Successfully killed leader.')
# TODO(wickman) Blocking may not be the best idea here. Perhaps block up to
# a maximum timeout. But blocking is necessary because os.kill does not immediately
# release the lock if we're in force mode.
fp = lock_file(filename, "a+", blocking=True)
else:
log.error('Found existing runner, cannot take control.')
if fp in (None, False):
raise cls.PermissionError('Could not open locked checkpoint: %s, lock_file = %s' %
(filename, fp))
ckpt = ThriftRecordWriter(fp)
ckpt.set_sync(True)
return ckpt
示例14: __init__
#.........这里部分代码省略.........
disabled for this task.]
optional:
log_dir (string) = directory to house stdout/stderr logs. If not specified, logs will be
written into the sandbox directory under .logs/
task_id (string) = bind to this task id. if not specified, will synthesize an id based
upon task.name()
portmap (dict) = a map (string => integer) from name to port, e.g. { 'http': 80 }
user (string) = the user to run the task as. if not current user, requires setuid
privileges.
chroot (boolean) = whether or not to chroot into the sandbox prior to exec.
clock (time interface) = the clock to use throughout
universal_handler = checkpoint record handler (only used for testing)
planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task
planning policy.
process_logger_destination (string) = The destination of logger to use for all processes.
process_logger_mode (string) = The mode of logger to use for all processes.
rotate_log_size_mb (integer) = The maximum size of the rotated stdout/stderr logs in MiB.
rotate_log_backups (integer) = The maximum number of rotated stdout/stderr log backups.
preserve_env (boolean) = whether or not env variables for the runner should be in the
env for the task being run
mesos_containerizer_path = the path to the mesos-containerizer executable that will be used
to isolate the task's filesystem (if using a filesystem image).
container_sandbox = the path within the isolated filesystem where the task's sandbox is
mounted.
"""
if not issubclass(planner_class, TaskPlanner):
raise TypeError('planner_class must be a TaskPlanner.')
self._clock = clock
launch_time = self._clock.time()
launch_time_ms = '%06d' % int((launch_time - int(launch_time)) * (10 ** 6))
if not task_id:
self._task_id = '%s-%s.%s' % (task.name(),
time.strftime('%Y%m%d-%H%M%S', time.localtime(launch_time)),
launch_time_ms)
else:
self._task_id = task_id
current_user = TaskRunnerHelper.get_actual_user()
self._user = user or current_user
# TODO(wickman) This should be delegated to the ProcessPlatform / Helper
if self._user != current_user:
if os.geteuid() != 0:
raise ValueError('task specifies user as %s, but %s does not have setuid permission!' % (
self._user, current_user))
self._portmap = portmap or {}
self._launch_time = launch_time
self._log_dir = log_dir or os.path.join(sandbox, '.logs')
self._process_logger_destination = process_logger_destination
self._process_logger_mode = process_logger_mode
self._rotate_log_size_mb = rotate_log_size_mb
self._rotate_log_backups = rotate_log_backups
self._pathspec = TaskPath(root=checkpoint_root, task_id=self._task_id, log_dir=self._log_dir)
self._hostname = hostname or socket.gethostname()
try:
ThermosTaskValidator.assert_valid_task(task)
ThermosTaskValidator.assert_valid_ports(task, self._portmap)
except ThermosTaskValidator.InvalidTaskError as e:
raise self.InvalidTask('Invalid task: %s' % e)
context = ThermosContext(
task_id=self._task_id,
ports=self._portmap,
user=self._user)
self._task, uninterp = (task % Environment(thermos=context)).interpolate()
if len(uninterp) > 0:
raise self.InvalidTask('Failed to interpolate task, missing: %s' %
', '.join(str(ref) for ref in uninterp))
try:
ThermosTaskValidator.assert_same_task(self._pathspec, self._task)
except ThermosTaskValidator.InvalidTaskError as e:
raise self.InvalidTask('Invalid task: %s' % e)
self._plan = None # plan currently being executed (updated by Handlers)
self._regular_plan = planner_class(self._task, clock=clock,
process_filter=lambda proc: proc.final().get() is False)
self._finalizing_plan = planner_class(self._task, clock=clock,
process_filter=lambda proc: proc.final().get() is True)
self._chroot = chroot
self._sandbox = sandbox
self._container_sandbox = container_sandbox
self._terminal_state = None
self._ckpt = None
self._process_map = dict((p.name().get(), p) for p in self._task.processes())
self._task_processes = {}
self._stages = dict((state, stage(self)) for state, stage in self.STAGES.items())
self._finalization_start = None
self._preemption_deadline = None
self._watcher = ProcessMuxer(self._pathspec)
self._state = RunnerState(processes={})
self._preserve_env = preserve_env
self._mesos_containerizer_path = mesos_containerizer_path
# create runner state
universal_handler = universal_handler or TaskRunnerUniversalHandler
self._dispatcher = CheckpointDispatcher()
self._dispatcher.register_handler(universal_handler(self))
self._dispatcher.register_handler(TaskRunnerProcessHandler(self))
self._dispatcher.register_handler(TaskRunnerTaskHandler(self))
# recover checkpointed runner state and update plan
self._recovery = True
self._replay_runner_ckpt()
示例15: state
def state(self):
"""Return final state of Task (RunnerState, read from disk and cached for future access)"""
if self._state is None:
path = self._pathspec.getpath('runner_checkpoint')
self._state = CheckpointDispatcher.from_file(path)
return copy.deepcopy(self._state) if self._state else RunnerState(processes={})