本文整理汇总了C++中ORTE_ACTIVATE_JOB_STATE函数的典型用法代码示例。如果您正苦于以下问题:C++ ORTE_ACTIVATE_JOB_STATE函数的具体用法?C++ ORTE_ACTIVATE_JOB_STATE怎么用?C++ ORTE_ACTIVATE_JOB_STATE使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ORTE_ACTIVATE_JOB_STATE函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: plm_alps_launch_job
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int plm_alps_launch_job(orte_job_t *jdata)
{
orte_app_context_t *app;
for (int i = 0 ; i < jdata->apps->size ; ++i) {
int env_count;
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
for (env_count = 0 ; app->env && app->env[env_count] ; ++env_count);
/* disable PMI for the application. this will prevent the pmi library from printing useless warnings */
opal_argv_append (&env_count, &app->env, "PMI_NO_FORK=1");
opal_argv_append (&env_count, &app->env, "PMI_NO_PREINITIALIZE=1");
}
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
/* new job - set it up */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
}
return ORTE_SUCCESS;
}
示例2: alps_wait_cb
static void alps_wait_cb(orte_proc_t *proc, void* cbdata){
orte_job_t *jdata;
/* According to the ALPS folks, alps always returns the highest exit
code of our remote processes. Thus, a non-zero exit status doesn't
necessarily mean that alps failed - it could be that an orted returned
a non-zero exit status. Of course, that means the orted failed(!), so
the end result is the same - the job didn't start.
As a result, we really can't do much with the exit status itself - it
could be something in errno (if alps itself failed), or it could be
something returned by an orted, or it could be something returned by
the OS (e.g., couldn't find the orted binary). Somebody is welcome
to sort out all the options and pretty-print a better error message. For
now, though, the only thing that really matters is that
alps failed. Report the error and make sure that orterun
wakes up - otherwise, do nothing!
*/
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (0 != proc->exit_code) {
if (failed_launch) {
/* report that the daemon has failed so we break out of the daemon
* callback receive and exit
*/
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START);
} else {
/* an orted must have died unexpectedly after launch - report
* that the daemon has failed so we exit
*/
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED);
}
}
}
示例3: plm_alps_launch_job
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int plm_alps_launch_job(orte_job_t *jdata)
{
if (ORTE_JOB_CONTROL_RESTART & jdata->controls) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
/* new job - set it up */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
}
return ORTE_SUCCESS;
}
示例4: plm_slurm_launch_job
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int plm_slurm_launch_job(orte_job_t *jdata)
{
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
/* new job - set it up */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
}
return ORTE_SUCCESS;
}
示例5: orte_rml_oob_ft_event
int
orte_rml_oob_ft_event(int state) {
int exit_status = ORTE_SUCCESS;
int ret;
if(OPAL_CRS_CHECKPOINT == state) {
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CHECKPOINT);
}
else if(OPAL_CRS_CONTINUE == state) {
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CONTINUE);
}
else if(OPAL_CRS_RESTART == state) {
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_RESTART);
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
(void) mca_base_framework_close(&orte_oob_base_framework);
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
if( ORTE_SUCCESS != (ret = orte_oob_base_select())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
cleanup:
return exit_status;
}
示例6: plm_alps_terminate_orteds
/**
* Terminate the orteds for a given job
*/
static int plm_alps_terminate_orteds(void)
{
int rc;
orte_job_t *jdata;
OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
"%s plm:alps: terminating orteds",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* deregister the waitpid callback to ensure we don't make it look like
* alps failed when it didn't. Since the alps may have already completed,
* do NOT ERROR_LOG any return code to avoid confusing, duplicate error
* messages
*/
if (NULL != alpsrun) {
orte_wait_cb_cancel(alpsrun);
}
/* now tell them to die */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
"%s plm:alps: terminated orteds",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return rc;
}
示例7: poll_spawns
static void poll_spawns(int fd, short args, void *cbdata)
{
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
int i, rc;
bool failed_launch = true;
int local_err;
tm_event_t event;
/* TM poll for all the spawns */
for (i = 0; i < launched; ++i) {
rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
if (TM_SUCCESS != rc) {
opal_output(0, "plm:tm: failed to poll for a spawned daemon, return status = %d", rc);
goto cleanup;
}
if (TM_SUCCESS != local_err) {
opal_output(0, "plm:tm: failed to spawn daemon, error code = %d", local_err );
goto cleanup;
}
}
failed_launch = false;
cleanup:
/* cleanup */
OBJ_RELEASE(state);
/* check for failed launch - if so, force terminate */
if (failed_launch) {
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_FAILED_TO_START);
}
}
示例8: plm_slurm_terminate_orteds
/**
* Terminate the orteds for a given job
*/
static int plm_slurm_terminate_orteds(void)
{
int rc=ORTE_SUCCESS;
orte_job_t *jdata;
/* check to see if the primary pid is set. If not, this indicates
* that we never launched any additional daemons, so we cannot
* not wait for a waitpid to fire and tell us it's okay to
* exit. Instead, we simply trigger an exit for ourselves
*/
if (primary_pid_set) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}
} else {
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:slurm: primary daemons complete!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
/* need to set the #terminated value to avoid an incorrect error msg */
jdata->num_terminated = jdata->num_procs;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
return rc;
}
示例9: hnp_notify
static void hnp_notify(int sd, short args, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata = caddy->jdata;
orte_process_name_t parent, target, *npptr;
/* if they requested notification upon completion, provide it */
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, NULL, OPAL_BOOL)) {
/* notify_completion => notify the parent of the termination
* of this child job. So get the parent jobid info */
npptr = &parent;
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&npptr, OPAL_NAME)) {
/* notify everyone who asked for it */
target.jobid = jdata->jobid;
target.vpid = ORTE_VPID_WILDCARD;
_send_notification(OPAL_ERR_JOB_TERMINATED, caddy->proc_state, &target, ORTE_NAME_WILDCARD);
} else {
target.jobid = jdata->jobid;
target.vpid = ORTE_VPID_WILDCARD;
_send_notification(OPAL_ERR_JOB_TERMINATED, caddy->proc_state, &target, &parent);
}
}
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFIED);
OBJ_RELEASE(caddy);
}
示例10: orte_routed_base_xcast_routing
void orte_routed_base_xcast_routing(orte_grpcomm_collective_t *coll,
opal_list_t *my_children)
{
opal_list_item_t *item;
orte_routed_tree_t *child;
orte_namelist_t *nm;
int i;
orte_proc_t *proc;
orte_job_t *daemons;
/* if we are the HNP and an abnormal termination is underway,
* then send it directly to everyone
*/
if (ORTE_PROC_IS_HNP) {
if (orte_abnormal_term_ordered) {
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
for (i=1; i < daemons->procs->size; i++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, i))) {
continue;
}
/* exclude anyone known not alive */
if (proc->alive) {
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
nm->name.vpid = proc->name.vpid;
opal_list_append(&coll->targets, &nm->super);
}
}
/* if nobody is known alive, then we need to die */
if (0 == opal_list_get_size(&coll->targets)) {
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
} else {
/* the xcast always goes to our children */
for (item = opal_list_get_first(my_children);
item != opal_list_get_end(my_children);
item = opal_list_get_next(item)) {
child = (orte_routed_tree_t*)item;
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
nm->name.vpid = child->vpid;
opal_list_append(&coll->targets, &nm->super);
}
}
} else {
/* I am a daemon - route to my children */
for (item = opal_list_get_first(my_children);
item != opal_list_get_end(my_children);
item = opal_list_get_next(item)) {
child = (orte_routed_tree_t*)item;
nm = OBJ_NEW(orte_namelist_t);
nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
nm->name.vpid = child->vpid;
opal_list_append(&coll->targets, &nm->super);
}
}
}
示例11: shutdown_signal
static void shutdown_signal(int fd, short flags, void *arg)
{
/* trigger the call to shutdown callback to protect
* against race conditions - the trigger event will
* check the one-time lock
*/
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT);
}
示例12: allocation_complete
/* after we allocate, we need to map the processes
* so we know what nodes will be used
*/
static void allocation_complete(int fd, short args, void *cbdata)
{
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata;
orte_job_t *daemons;
orte_topology_t *t;
orte_node_t *node;
int i;
ORTE_ACQUIRE_OBJECT(caddy);
jdata = state->jdata;
jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
/* get the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto done;
}
/* mark that we are not using a VM */
orte_set_attribute(&daemons->attributes, ORTE_JOB_NO_VM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
/* ensure that all nodes point to our topology - we
* cannot support hetero nodes with this state machine
*/
t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
node->topology = t;
}
if (!orte_managed_allocation) {
if (NULL != orte_set_slots &&
0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setting slots for node %s by %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
orte_plm_base_set_slots(node);
}
}
}
}
/* move to the map stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
done:
/* cleanup */
OBJ_RELEASE(state);
}
示例13: plm_yarn_terminate_orteds
/**
* Terminate the orteds for a given job
*/
static int plm_yarn_terminate_orteds(void) {
finish_app_master(0 == orte_exit_status);
orte_job_t* jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (ORTE_JOB_STATE_DAEMONS_TERMINATED != jdata->state) {
/* need to set the #terminated value to avoid an incorrect error msg */
jdata->num_terminated = jdata->num_procs;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
return ORTE_SUCCESS;
}
示例14: launch_daemons
static void launch_daemons(int fd, short args, void *cbdata)
{
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
/* there are no daemons to launch, so just trigger the
* daemon-launch-complete state
*/
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
}
示例15: alps_wait_cb
static void alps_wait_cb(pid_t pid, int status, void* cbdata){
orte_job_t *jdata;
/* According to the ALPS folks, alps always returns the highest exit
code of our remote processes. Thus, a non-zero exit status doesn't
necessarily mean that alps failed - it could be that an orted returned
a non-zero exit status. Of course, that means the orted failed(!), so
the end result is the same - the job didn't start.
As a result, we really can't do much with the exit status itself - it
could be something in errno (if alps itself failed), or it could be
something returned by an orted, or it could be something returned by
the OS (e.g., couldn't find the orted binary). Somebody is welcome
to sort out all the options and pretty-print a better error message. For
now, though, the only thing that really matters is that
alps failed. Report the error and make sure that orterun
wakes up - otherwise, do nothing!
*/
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (0 != status) {
if (failed_launch) {
/* we have a problem during launch */
opal_output(0, "ERROR: alps failed to start the required daemons.");
opal_output(0, "ERROR: This could be due to an inability to find the orted binary (--prefix)");
opal_output(0, "ERROR: on one or more remote nodes, compilation of the orted with dynamic libraries,");
opal_output(0, "ERROR: lack of authority to execute on one or more specified nodes,");
opal_output(0, "ERROR: or the inability to write startup files into /tmp (--tmpdir/orte_tmpdir_base).");
/* report that the daemon has failed so we break out of the daemon
* callback receive and exit
*/
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START);
} else {
/* an orted must have died unexpectedly after launch - report
* that the daemon has failed so we exit
*/
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED);
}
}
}