当前位置: 首页>>代码示例>>Python>>正文


Python ckpt.CheckpointDispatcher类代码示例

本文整理汇总了Python中apache.thermos.common.ckpt.CheckpointDispatcher的典型用法代码示例。如果您正苦于以下问题:Python CheckpointDispatcher类的具体用法?Python CheckpointDispatcher怎么用?Python CheckpointDispatcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CheckpointDispatcher类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read

def read(args, options):
  """Replay a thermos checkpoint.

  Usage: thermos read [options] checkpoint_filename
  Options:
    --simple	Do not replay the full task state machine.  Only print out the contents of
                each checkpoint log message.
  """
  if len(args) != 1:
    app.error('Expected one checkpoint file, got %s' % len(args))
  if not os.path.exists(args[0]):
    app.error('Could not find %s' % args[0])

  dispatcher = CheckpointDispatcher()
  state = RunnerState(processes={})
  with open(args[0], 'r') as fp:
    try:
      for record in ThriftRecordReader(fp, RunnerCkpt):
        if not options.simple:
          dispatcher.dispatch(state, record)
        else:
          print('CKPT: %s' % record)
    except RecordIO.Error as err:
      print("Failed to recover from %s: %s" % (fp.name, err))
      return

  if not options.simple:
    if state is None or state.header is None:
      print('Checkpoint stream CORRUPT or outdated format')
      return
    print('Recovered Task Header:')
    print('  id:      %s' % state.header.task_id)
    print('  user:    %s' % state.header.user)
    print('  host:    %s' % state.header.hostname)
    print('  sandbox: %s' % state.header.sandbox)
    if state.header.ports:
      print('  ports:   %s' % ' '.join(
        '%s->%s' % (name, port) for (name, port) in state.header.ports.items()))
    print('Recovered Task States:')
    for task_status in state.statuses:
      print('  %s [pid: %d] => %s' % (
        time.asctime(time.localtime(task_status.timestamp_ms / 1000.0)),
        task_status.runner_pid,
        TaskState._VALUES_TO_NAMES[task_status.state]))
    print('Recovered Processes:')
    for process, process_history in state.processes.items():
      print('  %s   runs: %s' % (process, len(process_history)))
      for k in reversed(range(len(process_history))):
        run = process_history[k]
        print('    %2d: pid=%d, rc=%s, finish:%s, state:%s' % (
          k,
          run.pid,
          run.return_code if run.return_code is not None else '',
          time.asctime(time.localtime(run.stop_time)) if run.stop_time else 'None',
          ProcessState._VALUES_TO_NAMES.get(run.state, 'Unknown')))
开发者ID:dominichamon,项目名称:incubator-aurora,代码行数:55,代码来源:thermos.py

示例2: tail

def tail(args, options):
    """Tail the logs of a task process.

    Usage: thermos tail task_name [process_name]
  """
    if len(args) == 0:
        app.error("Expected a task to tail, got nothing!")
    if len(args) not in (1, 2):
        app.error("Expected at most two arguments (task and optional process), got %d" % len(args))

    task_id = args[0]
    detector = TaskDetector(root=options.root)
    checkpoint = CheckpointDispatcher.from_file(detector.get_checkpoint(task_id))
    log_dir = checkpoint.header.log_dir
    process_runs = [(process, run) for (process, run) in detector.get_process_runs(task_id, log_dir)]
    if len(args) == 2:
        process_runs = [(process, run) for (process, run) in process_runs if process == args[1]]

    if len(process_runs) == 0:
        print("ERROR: No processes found.", file=sys.stderr)
        sys.exit(1)

    processes = set([process for process, _ in process_runs])
    if len(processes) != 1:
        print("ERROR: More than one process matches query.", file=sys.stderr)
        sys.exit(1)

    process = processes.pop()
    run = max([run for _, run in process_runs])

    logdir = TaskPath(root=options.root, task_id=args[0], process=process, run=run, log_dir=log_dir).getpath(
        "process_logdir"
    )
    logfile = os.path.join(logdir, "stderr" if options.use_stderr else "stdout")

    monitor = TaskMonitor(TaskPath(root=options.root), args[0])

    def log_is_active():
        active_processes = monitor.get_active_processes()
        for process_status, process_run in active_processes:
            if process_status.process == process and process_run == run:
                return True
        return False

    if not log_is_active():
        print("Tail of terminal log %s" % logfile)
        for line in tail_closed(logfile):
            print(line.rstrip())
        return

    now = time.time()
    next_check = now + 5.0
    print("Tail of active log %s" % logfile)
    for line in tail_f(logfile, include_last=True, forever=False):
        print(line.rstrip())
        if time.time() > next_check:
            if not log_is_active():
                break
            else:
                next_check = time.time() + 5.0
开发者ID:sumanau7,项目名称:incubator-aurora,代码行数:60,代码来源:thermos.py

示例3: get

 def get(cls, task_id, checkpoint_root):
     """
   Get a TaskRunner bound to the task_id in checkpoint_root.
 """
     path = TaskPath(root=checkpoint_root, task_id=task_id, state="active")
     task_json = path.getpath("task_path")
     task_checkpoint = path.getpath("runner_checkpoint")
     if not os.path.exists(task_json):
         return None
     task = ThermosConfigLoader.load_json(task_json)
     if task is None:
         return None
     if len(task.tasks()) == 0:
         return None
     try:
         checkpoint = CheckpointDispatcher.from_file(task_checkpoint)
         if checkpoint is None or checkpoint.header is None:
             return None
         return cls(
             task.tasks()[0].task(),
             checkpoint_root,
             checkpoint.header.sandbox,
             log_dir=checkpoint.header.log_dir,
             task_id=task_id,
             portmap=checkpoint.header.ports,
             hostname=checkpoint.header.hostname,
         )
     except Exception as e:
         log.error("Failed to reconstitute checkpoint in TaskRunner.get: %s" % e, exc_info=True)
         return None
开发者ID:StephanErb,项目名称:aurora,代码行数:30,代码来源:runner.py

示例4: get_states

 def get_states(self, task_id):
   """Returns the (timestamp, status) tuples of the task or [] if could not replay."""
   statuses = CheckpointDispatcher.iter_statuses(self._runner_ckpt(task_id))
   try:
     return [(state.timestamp_ms / 1000.0, state.state) for state in statuses]
   except CheckpointDispatcher.ErrorRecoveringState:
     return []
开发者ID:josephglanville,项目名称:incubator-aurora,代码行数:7,代码来源:gc_executor.py

示例5: get_sandbox

 def get_sandbox(self, task_id):
   """Returns the sandbox of the task, or None if it has not yet been initialized."""
   try:
     for update in CheckpointDispatcher.iter_updates(self._runner_ckpt(task_id)):
       if update.runner_header and update.runner_header.sandbox:
         return update.runner_header.sandbox
   except CheckpointDispatcher.ErrorRecoveringState:
     return None
开发者ID:josephglanville,项目名称:incubator-aurora,代码行数:8,代码来源:gc_executor.py

示例6: run

  def run(self):
    self._run_count += 1
    atexit.register(self.cleanup)

    if self.script_filename:
      os.unlink(self.script_filename)

    with temporary_file(cleanup=False) as fp:
      self.script_filename = fp.name
      fp.write(self.RUN_JOB_SCRIPT % {
        'filename': self.job_filename,
        'sandbox': self.sandbox,
        'root': self.tempdir,
        'task_id': self.task_id,
        'state_filename': self.state_filename,
        'success_rate': self.success_rate,
        'random_seed': self.random_seed + self._run_count,
        'extra_task_runner_args': self.extra_task_runner_args,
      })

    with environment_as(PYTHONPATH=os.pathsep.join(sys.path)):
      self.po = subprocess.Popen([sys.executable, self.script_filename],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
      try:
        so, se = self.po.communicate()
      except OSError as e:
        if e.errno == errno.ECHILD:
          so = se = 'Killed'
        else:
          raise

    rc = self.po.returncode
    if rc != 0:
      if os.path.exists(self.job_filename):
        with open(self.job_filename) as fp:
          config = fp.read()
      else:
        config = 'Nonexistent!'
      if 'THERMOS_DEBUG' in os.environ:
        print("Runner failed!\n\n\nconfig:%s\n\n\nstdout:%s\n\n\nstderr:%s\n\n\n" % (
            config, so, se))

    try:
      with open(self.state_filename, 'r') as fp:
        self.state = thrift_deserialize(RunnerState(), fp.read())
    except Exception as e:
      if 'THERMOS_DEBUG' in os.environ:
        print('Failed to load Runner state: %s' % e, file=sys.stderr)
      self.state = RunnerState()

    try:
      self.reconstructed_state = CheckpointDispatcher.from_file(
          self.pathspec.getpath('runner_checkpoint'))
    except Exception as e:
      print('Failed to replay checkpoint: %s' % e, file=sys.stderr)
      self.reconstructed_state = None
    self.initialized = True
    return rc
开发者ID:AltanAlpay,项目名称:aurora,代码行数:58,代码来源:runner.py

示例7: __init__

 def __init__(self, pathspec, task_id):
   self._task_id = task_id
   self._dispatcher = CheckpointDispatcher()
   self._runnerstate = RunnerState(processes={})
   self._runner_ckpt = pathspec.given(task_id=task_id).getpath('runner_checkpoint')
   self._active_file, self._finished_file = (
       pathspec.given(task_id=task_id, state=state).getpath('task_path')
       for state in ('active', 'finished'))
   self._ckpt_head = 0
   self._apply_states()
   self._lock = threading.Lock()
开发者ID:betepahos,项目名称:incubator-aurora,代码行数:11,代码来源:monitor.py

示例8: kill

  def kill(cls, task_id, checkpoint_root, force=False,
           terminal_status=TaskState.KILLED, clock=time):
    """
      An implementation of Task killing that doesn't require a fully hydrated TaskRunner object.
      Terminal status must be either KILLED or LOST state.
    """
    if terminal_status not in (TaskState.KILLED, TaskState.LOST):
      raise cls.Error('terminal_status must be KILLED or LOST (got %s)' %
                      TaskState._VALUES_TO_NAMES.get(terminal_status) or terminal_status)
    pathspec = TaskPath(root=checkpoint_root, task_id=task_id)
    checkpoint = pathspec.getpath('runner_checkpoint')
    state = CheckpointDispatcher.from_file(checkpoint)

    if state is None or state.header is None or state.statuses is None:
      if force:
        log.error('Task has uninitialized TaskState - forcibly finalizing')
        cls.finalize_task(pathspec)
        return
      else:
        log.error('Cannot update states in uninitialized TaskState!')
        return

    ckpt = cls.open_checkpoint(checkpoint, force=force, state=state)

    def write_task_state(state):
      update = TaskStatus(state=state, timestamp_ms=int(clock.time() * 1000),
                          runner_pid=os.getpid(), runner_uid=os.getuid())
      ckpt.write(RunnerCkpt(task_status=update))

    def write_process_status(status):
      ckpt.write(RunnerCkpt(process_status=status))

    if cls.is_task_terminal(state.statuses[-1].state):
      log.info('Task is already in terminal state!  Finalizing.')
      cls.finalize_task(pathspec)
      return

    with closing(ckpt):
      write_task_state(TaskState.ACTIVE)
      for process, history in state.processes.items():
        process_status = history[-1]
        if not cls.is_process_terminal(process_status.state):
          if cls.kill_process(state, process):
            write_process_status(ProcessStatus(process=process,
              state=ProcessState.KILLED, seq=process_status.seq + 1, return_code=-9,
              stop_time=clock.time()))
          else:
            if process_status.state is not ProcessState.WAITING:
              write_process_status(ProcessStatus(process=process,
                state=ProcessState.LOST, seq=process_status.seq + 1))
      write_task_state(terminal_status)
    cls.finalize_task(pathspec)
开发者ID:betepahos,项目名称:incubator-aurora,代码行数:52,代码来源:helper.py

示例9: main

def main(args):
  values = app.get_options()

  if len(args) > 0:
    print("ERROR: unrecognized arguments: %s\n" % (" ".join(args)), file=sys.stderr)
    app.help()
    sys.exit(1)

  if not values.ckpt:
    print("ERROR: must supply --checkpoint", file=sys.stderr)
    app.help()
    sys.exit(1)

  fp = file(values.ckpt, "r")
  rr = ThriftRecordReader(fp, RunnerCkpt)
  wrs = RunnerState(processes={})
  dispatcher = CheckpointDispatcher()
  try:
    for wts in rr:
      print('Recovering: %s' % wts)
      if values.assemble is True:
        dispatcher.dispatch(wrs, wts)
  except RecordIO.Error as err:
    print('Error recovering checkpoint stream: %s' % err, file=sys.stderr)
    return
  print('\n\n\n')
  if values.assemble:
    print('Recovered Task Header')
    pprint.pprint(wrs.header, indent=4)

    print('\nRecovered Task States')
    for task_status in wrs.statuses:
      print('  %s [pid: %d] => %s' % (
          time.asctime(time.localtime(task_status.timestamp_ms / 1000.0)),
          task_status.runner_pid,
          TaskState._VALUES_TO_NAMES[task_status.state]))

    print('\nRecovered Processes')
    pprint.pprint(wrs.processes, indent=4)
开发者ID:KancerEzeroglu,项目名称:aurora,代码行数:39,代码来源:thermos_ckpt.py

示例10: __init__

    def __init__(self, root, task_id):
        """Construct a TaskMonitor.

    :param root: The checkpoint root of the task.
    :param task_id: The task id of the task.
    """
        pathspec = TaskPath(root=root, task_id=task_id)
        self._dispatcher = CheckpointDispatcher()
        self._runnerstate = RunnerState(processes={})
        self._runner_ckpt = pathspec.getpath("runner_checkpoint")
        self._active_file, self._finished_file = (
            pathspec.given(state=state).getpath("task_path") for state in ("active", "finished")
        )
        self._ckpt_head = 0
        self._apply_states()
        self._lock = threading.Lock()
开发者ID:rowoot,项目名称:aurora,代码行数:16,代码来源:monitor.py

示例11: format_task

 def format_task(task_id):
     checkpoint_filename = detector.get_checkpoint(task_id)
     checkpoint_stat = os.stat(checkpoint_filename)
     try:
         checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
     except:
         checkpoint_owner = "uid:%s" % checkpoint_stat.st_uid
     print("  %-20s [owner: %8s]" % (task_id, checkpoint_owner), end="")
     if options.verbose == 0:
         print()
     if options.verbose > 0:
         state = CheckpointDispatcher.from_file(checkpoint_filename)
         if state is None or state.header is None:
             print(" - checkpoint stream CORRUPT or outdated format")
             return
         print("  state: %8s" % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, "Unknown"), end="")
         print(" start: %25s" % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
     if options.verbose > 1:
         print("    user: %s" % state.header.user, end="")
         if state.header.ports:
             print(" ports: %s" % " ".join("%s -> %s" % (key, val) for key, val in state.header.ports.items()))
         else:
             print(" ports: None")
         print("    sandbox: %s" % state.header.sandbox)
     if options.verbose > 2:
         print("    process table:")
         for process, process_history in state.processes.items():
             print("      - %s runs: %s" % (process, len(process_history)), end="")
             last_run = process_history[-1]
             print(
                 " last: pid=%s, rc=%s, finish:%s, state:%s"
                 % (
                     last_run.pid or "None",
                     last_run.return_code if last_run.return_code is not None else "",
                     time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else "None",
                     ProcessState._VALUES_TO_NAMES.get(last_run.state, "Unknown"),
                 )
             )
         print()
开发者ID:sumanau7,项目名称:incubator-aurora,代码行数:39,代码来源:thermos.py

示例12: format_task

 def format_task(task_id):
   checkpoint_filename = detector.get_checkpoint(task_id)
   checkpoint_stat = os.stat(checkpoint_filename)
   try:
     checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
   except KeyError:
     checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid
   print('  %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='')
   if options.verbose == 0:
     print()
   if options.verbose > 0:
     state = CheckpointDispatcher.from_file(checkpoint_filename)
     if state is None or state.header is None:
       print(' - checkpoint stream CORRUPT or outdated format')
       return
     print('  state: %8s' % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, 'Unknown'),
       end='')
     print(' start: %25s' % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
   if options.verbose > 1:
     print('    user: %s' % state.header.user, end='')
     if state.header.ports:
       print(' ports: %s' % ' '.join('%s -> %s' % (key, val)
                                        for key, val in state.header.ports.items()))
     else:
       print(' ports: None')
     print('    sandbox: %s' % state.header.sandbox)
   if options.verbose > 2:
     print('    process table:')
     for process, process_history in state.processes.items():
       print('      - %s runs: %s' % (process, len(process_history)), end='')
       last_run = process_history[-1]
       print(' last: pid=%s, rc=%s, finish:%s, state:%s' % (
         last_run.pid or 'None',
         last_run.return_code if last_run.return_code is not None else '',
         time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None',
         ProcessState._VALUES_TO_NAMES.get(last_run.state, 'Unknown')))
     print()
开发者ID:dominichamon,项目名称:incubator-aurora,代码行数:37,代码来源:thermos.py

示例13: open_checkpoint

 def open_checkpoint(cls, filename, force=False, state=None):
   """
     Acquire a locked checkpoint stream.
   """
   safe_mkdir(os.path.dirname(filename))
   fp = lock_file(filename, "a+")
   if fp in (None, False):
     if force:
       log.info('Found existing runner, forcing leadership forfeit.')
       state = state or CheckpointDispatcher.from_file(filename)
       if cls.kill_runner(state):
         log.info('Successfully killed leader.')
         # TODO(wickman)  Blocking may not be the best idea here.  Perhaps block up to
         # a maximum timeout.  But blocking is necessary because os.kill does not immediately
         # release the lock if we're in force mode.
         fp = lock_file(filename, "a+", blocking=True)
     else:
       log.error('Found existing runner, cannot take control.')
   if fp in (None, False):
     raise cls.PermissionError('Could not open locked checkpoint: %s, lock_file = %s' %
       (filename, fp))
   ckpt = ThriftRecordWriter(fp)
   ckpt.set_sync(True)
   return ckpt
开发者ID:betepahos,项目名称:incubator-aurora,代码行数:24,代码来源:helper.py

示例14: __init__


#.........这里部分代码省略.........
                          disabled for this task.]

      optional:
        log_dir (string)  = directory to house stdout/stderr logs. If not specified, logs will be
                            written into the sandbox directory under .logs/
        task_id (string)  = bind to this task id.  if not specified, will synthesize an id based
                            upon task.name()
        portmap (dict)    = a map (string => integer) from name to port, e.g. { 'http': 80 }
        user (string)     = the user to run the task as.  if not current user, requires setuid
                            privileges.
        chroot (boolean)  = whether or not to chroot into the sandbox prior to exec.
        clock (time interface) = the clock to use throughout
        universal_handler = checkpoint record handler (only used for testing)
        planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task
                            planning policy.
        process_logger_destination (string) = The destination of logger to use for all processes.
        process_logger_mode (string) = The mode of logger to use for all processes.
        rotate_log_size_mb (integer) = The maximum size of the rotated stdout/stderr logs in MiB.
        rotate_log_backups (integer) = The maximum number of rotated stdout/stderr log backups.
        preserve_env (boolean) = whether or not env variables for the runner should be in the
                                 env for the task being run
        mesos_containerizer_path = the path to the mesos-containerizer executable that will be used
                                   to isolate the task's filesystem (if using a filesystem image).
        container_sandbox = the path within the isolated filesystem where the task's sandbox is
                            mounted.
    """
    if not issubclass(planner_class, TaskPlanner):
      raise TypeError('planner_class must be a TaskPlanner.')
    self._clock = clock
    launch_time = self._clock.time()
    launch_time_ms = '%06d' % int((launch_time - int(launch_time)) * (10 ** 6))
    if not task_id:
      self._task_id = '%s-%s.%s' % (task.name(),
                                    time.strftime('%Y%m%d-%H%M%S', time.localtime(launch_time)),
                                    launch_time_ms)
    else:
      self._task_id = task_id
    current_user = TaskRunnerHelper.get_actual_user()
    self._user = user or current_user
    # TODO(wickman) This should be delegated to the ProcessPlatform / Helper
    if self._user != current_user:
      if os.geteuid() != 0:
        raise ValueError('task specifies user as %s, but %s does not have setuid permission!' % (
          self._user, current_user))
    self._portmap = portmap or {}
    self._launch_time = launch_time
    self._log_dir = log_dir or os.path.join(sandbox, '.logs')
    self._process_logger_destination = process_logger_destination
    self._process_logger_mode = process_logger_mode
    self._rotate_log_size_mb = rotate_log_size_mb
    self._rotate_log_backups = rotate_log_backups
    self._pathspec = TaskPath(root=checkpoint_root, task_id=self._task_id, log_dir=self._log_dir)
    self._hostname = hostname or socket.gethostname()
    try:
      ThermosTaskValidator.assert_valid_task(task)
      ThermosTaskValidator.assert_valid_ports(task, self._portmap)
    except ThermosTaskValidator.InvalidTaskError as e:
      raise self.InvalidTask('Invalid task: %s' % e)
    context = ThermosContext(
        task_id=self._task_id,
        ports=self._portmap,
        user=self._user)
    self._task, uninterp = (task % Environment(thermos=context)).interpolate()
    if len(uninterp) > 0:
      raise self.InvalidTask('Failed to interpolate task, missing: %s' %
          ', '.join(str(ref) for ref in uninterp))
    try:
      ThermosTaskValidator.assert_same_task(self._pathspec, self._task)
    except ThermosTaskValidator.InvalidTaskError as e:
      raise self.InvalidTask('Invalid task: %s' % e)
    self._plan = None  # plan currently being executed (updated by Handlers)
    self._regular_plan = planner_class(self._task, clock=clock,
        process_filter=lambda proc: proc.final().get() is False)
    self._finalizing_plan = planner_class(self._task, clock=clock,
        process_filter=lambda proc: proc.final().get() is True)
    self._chroot = chroot
    self._sandbox = sandbox
    self._container_sandbox = container_sandbox
    self._terminal_state = None
    self._ckpt = None
    self._process_map = dict((p.name().get(), p) for p in self._task.processes())
    self._task_processes = {}
    self._stages = dict((state, stage(self)) for state, stage in self.STAGES.items())
    self._finalization_start = None
    self._preemption_deadline = None
    self._watcher = ProcessMuxer(self._pathspec)
    self._state = RunnerState(processes={})
    self._preserve_env = preserve_env
    self._mesos_containerizer_path = mesos_containerizer_path

    # create runner state
    universal_handler = universal_handler or TaskRunnerUniversalHandler
    self._dispatcher = CheckpointDispatcher()
    self._dispatcher.register_handler(universal_handler(self))
    self._dispatcher.register_handler(TaskRunnerProcessHandler(self))
    self._dispatcher.register_handler(TaskRunnerTaskHandler(self))

    # recover checkpointed runner state and update plan
    self._recovery = True
    self._replay_runner_ckpt()
开发者ID:apache,项目名称:aurora,代码行数:101,代码来源:runner.py

示例15: state

 def state(self):
   """Return final state of Task (RunnerState, read from disk and cached for future access)"""
   if self._state is None:
     path = self._pathspec.getpath('runner_checkpoint')
     self._state = CheckpointDispatcher.from_file(path)
   return copy.deepcopy(self._state) if self._state else RunnerState(processes={})
开发者ID:AltanAlpay,项目名称:aurora,代码行数:6,代码来源:observed_task.py


注:本文中的apache.thermos.common.ckpt.CheckpointDispatcher类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。