本文整理汇总了C++中OPAL_OUTPUT_VERBOSE函数的典型用法代码示例。如果您正苦于以下问题:C++ OPAL_OUTPUT_VERBOSE函数的具体用法?C++ OPAL_OUTPUT_VERBOSE怎么用?C++ OPAL_OUTPUT_VERBOSE使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了OPAL_OUTPUT_VERBOSE函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: orte_util_add_hostfile_nodes
int orte_util_add_hostfile_nodes(opal_list_t *nodes,
bool *override_oversubscribed,
char *hostfile)
{
opal_list_t exclude;
opal_list_item_t *item, *itm;
int rc;
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s hostfile: checking hostfile %s for nodes",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
OBJ_CONSTRUCT(&exclude, opal_list_t);
/* parse the hostfile and add the contents to the list */
if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, false))) {
goto cleanup;
}
/* parse the nodes to check for any relative node directives */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
orte_node_t *node=(orte_node_t*)item;
if ('+' == node->name[0]) {
orte_show_help("help-hostfile.txt", "hostfile:relative-syntax",
true, node->name);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
}
/* remove from the list of nodes those that are in the exclude list */
while(NULL != (item = opal_list_remove_first(&exclude))) {
orte_node_t *exnode = (orte_node_t*)item;
/* check for matches on nodes */
for (itm = opal_list_get_first(nodes);
itm != opal_list_get_end(nodes);
itm = opal_list_get_next(itm)) {
orte_node_t *node=(orte_node_t*)itm;
if (0 == strcmp(exnode->name, node->name)) {
/* match - remove it */
opal_list_remove_item(nodes, itm);
OBJ_RELEASE(itm);
break;
}
}
OBJ_RELEASE(item);
}
/* indicate that ORTE should override any oversubscribed conditions
* based on local hardware limits since the user (a) might not have
* provided us any info on the #slots for a node, and (b) the user
* might have been wrong! If we don't check the number of local physical
* processors, then we could be too aggressive on our sched_yield setting
* and cause performance problems.
*/
*override_oversubscribed = true;
cleanup:
OBJ_DESTRUCT(&exclude);
return rc;
}
示例2: orte_rml_oob_send_nb
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
orte_process_name_t* peer,
struct iovec* iov,
int count,
orte_rml_tag_t tag,
orte_rml_callback_fn_t cbfunc,
void* cbdata)
{
orte_rml_recv_t *rcv;
orte_rml_send_t *snd;
int bytes;
orte_self_send_xfer_t *xfer;
int i;
char* ptr;
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_send to peer %s at tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer), tag));
if (ORTE_RML_TAG_INVALID == tag) {
/* cannot send to an invalid tag */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
if (NULL == peer ||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
/* cannot send to an invalid peer */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer)));
/* if this is a message to myself, then just post the message
* for receipt - no need to dive into the oob
*/
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
"%s rml_send_iovec_to_self at tag %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag));
/* send to self is a tad tricky - we really don't want
* to track the send callback function throughout the recv
* process and execute it upon receipt as this would provide
* very different timing from a non-self message. Specifically,
* if we just retain a pointer to the incoming data
* and then execute the send callback prior to the receive,
* then the caller will think we are done with the data and
* can release it. So we have to copy the data in order to
* execute the send callback prior to receiving the message.
*
* In truth, this really is a better mimic of the non-self
* message behavior. If we actually pushed the message out
* on the wire and had it loop back, then we would receive
* a new block of data anyway.
*/
/* setup the send callback */
xfer = OBJ_NEW(orte_self_send_xfer_t);
xfer->iov = iov;
xfer->count = count;
xfer->cbfunc.iov = cbfunc;
xfer->tag = tag;
xfer->cbdata = cbdata;
/* setup the event for the send callback */
opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer);
opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1);
/* copy the message for the recv */
rcv = OBJ_NEW(orte_rml_recv_t);
rcv->sender = *peer;
rcv->tag = tag;
/* get the total number of bytes in the iovec array */
bytes = 0;
for (i = 0 ; i < count ; ++i) {
bytes += iov[i].iov_len;
}
/* get the required memory allocation */
if (0 < bytes) {
rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes);
rcv->iov.iov_len = bytes;
/* transfer the bytes */
ptr = (char*)rcv->iov.iov_base;
for (i = 0 ; i < count ; ++i) {
memcpy(ptr, iov[i].iov_base, iov[i].iov_len);
ptr += iov[i].iov_len;
}
}
/* post the message for receipt - since the send callback was posted
* first and has the same priority, it will execute first
*/
ORTE_RML_ACTIVATE_MESSAGE(rcv);
return ORTE_SUCCESS;
}
snd = OBJ_NEW(orte_rml_send_t);
snd->dst = *peer;
snd->origin = *ORTE_PROC_MY_NAME;
snd->tag = tag;
//.........这里部分代码省略.........
示例3: ompi_osc_portals4_rget
int
ompi_osc_portals4_rget(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **ompi_req)
{
int ret;
ompi_osc_portals4_request_t *request;
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
ptl_handle_md_t md_h;
void *md_base;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
target_count, target_dt->name,
(unsigned long) win));
OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
*ompi_req = &request->super;
offset = get_displacement(module, target) * target_disp;
if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
return OMPI_ERR_NOT_SUPPORTED;
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected = 1;
ret = ompi_datatype_type_size(origin_dt, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
length *= origin_count;
ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
ret = PtlGet(md_h,
(ptl_size_t) ((char*) origin_addr - (char*) md_base),
length,
peer,
module->pt_idx,
module->match_bits,
offset,
request);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
}
return OMPI_SUCCESS;
}
示例4: portals4_progress
/* Target EQ */
static int
portals4_progress(void)
{
int count = 0, ret;
ptl_event_t ev;
ompi_coll_portals4_request_t *ptl_request;
while (true) {
ret = PtlEQGet(mca_coll_portals4_component.eq_h, &ev);
if (PTL_OK == ret) {
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "event type=%s\n", evname[ev.type]));
count++;
switch (ev.type) {
case PTL_EVENT_PUT:
/* Non-Blocking / request */
if (PTL_OK == ev.ni_fail_type) {
OPAL_OUTPUT_VERBOSE((50, ompi_coll_base_framework.framework_output,
"hdr_data %p, matchbits 0x%lx",
(void*) ev.hdr_data, ev.match_bits));
assert(0 != ev.hdr_data);
ptl_request = (ompi_coll_portals4_request_t*) ev.hdr_data;
assert(NULL != ptl_request);
switch (ptl_request->type) {
case OMPI_COLL_PORTALS4_TYPE_BARRIER:
ompi_coll_portals4_ibarrier_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_BCAST:
ompi_coll_portals4_ibcast_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_REDUCE:
ompi_coll_portals4_ireduce_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_ALLREDUCE:
ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_SCATTER:
ompi_coll_portals4_iscatter_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_GATHER:
ompi_coll_portals4_igather_intra_fini(ptl_request);
break;
}
}
if (PTL_OK != ev.ni_fail_type) {
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "ni_fail_type=%s\n", failtype[ev.ni_fail_type]));
}
break;
default:
opal_output(ompi_coll_base_framework.framework_output,
"Unexpected event of type %d", ev.type);
break;
}
}
else if (PTL_EQ_EMPTY == ret) {
break;
}
else if (PTL_EQ_DROPPED == ret) {
opal_output(ompi_coll_base_framework.framework_output, "Flow control situation without recovery (EQ_DROPPED)\n");
abort();
}
else {
opal_output(ompi_coll_base_framework.framework_output, "Error returned from PtlEQGet: %d", ret);
break;
}
}
return count;
}
示例5: segment_create
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(opal_shmem_ds_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OPAL_SUCCESS;
char *real_file_name = NULL;
pid_t my_pid = getpid();
bool space_available = false;
uint64_t amount_space_avail = 0;
/* the real size of the shared memory segment. this includes enough space
* to store our segment header.
*/
size_t real_size = size + sizeof(opal_shmem_seg_hdr_t);
opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED;
/* init the contents of opal_shmem_ds_t */
shmem_ds_reset(ds_buf);
/* change the path of shmem mmap's backing store? */
if (0 != opal_shmem_mmap_relocate_backing_file) {
int err;
if (path_usable(opal_shmem_mmap_backing_file_base_dir, &err)) {
if (NULL ==
(real_file_name =
get_uniq_file_name(opal_shmem_mmap_backing_file_base_dir,
file_name))) {
/* out of resources */
return OPAL_ERROR;
}
}
/* a relocated backing store was requested, but the path specified
* cannot be used :-(. if the flag is negative, then warn and continue
* with the default path. otherwise, fail.
*/
else if (opal_shmem_mmap_relocate_backing_file < 0) {
opal_output(0, "shmem: mmap: WARNING: could not relocate "
"backing store to \"%s\" (%s). Continuing with "
"default path.\n",
opal_shmem_mmap_backing_file_base_dir, strerror(err));
}
/* must be positive, so fail */
else {
opal_output(0, "shmem: mmap: WARNING: could not relocate "
"backing store to \"%s\" (%s). Cannot continue with "
"shmem mmap.\n", opal_shmem_mmap_backing_file_base_dir,
strerror(err));
return OPAL_ERROR;
}
}
/* are we using the default path? */
if (NULL == real_file_name) {
/* use the path specified by the caller of this function */
if (NULL == (real_file_name = strdup(file_name))) {
/* out of resources */
return OPAL_ERROR;
}
}
OPAL_OUTPUT_VERBOSE(
(70, opal_shmem_base_framework.framework_output,
"%s: %s: backing store base directory: %s\n",
mca_shmem_mmap_component.super.base_version.mca_type_name,
mca_shmem_mmap_component.super.base_version.mca_component_name,
real_file_name)
);
/* determine whether the specified filename is on a network file system.
* this is an important check because if the backing store is located on
* a network filesystem, the user may see a shared memory performance hit.
*/
if (opal_shmem_mmap_nfs_warning && opal_path_nfs(real_file_name)) {
char hn[MAXHOSTNAMELEN];
gethostname(hn, MAXHOSTNAMELEN - 1);
hn[MAXHOSTNAMELEN - 1] = '\0';
opal_show_help("help-opal-shmem-mmap.txt", "mmap on nfs", 1, hn,
real_file_name);
}
/* let's make sure we have enough space for the backing file */
if (OPAL_SUCCESS != (rc = enough_space(real_file_name,
real_size,
&amount_space_avail,
&space_available))) {
opal_output(0, "shmem: mmap: an error occurred while determining "
"whether or not %s could be created.", real_file_name);
/* rc is set */
goto out;
}
if (!space_available) {
char hn[MAXHOSTNAMELEN];
gethostname(hn, MAXHOSTNAMELEN - 1);
hn[MAXHOSTNAMELEN - 1] = '\0';
rc = OPAL_ERR_OUT_OF_RESOURCE;
opal_show_help("help-opal-shmem-mmap.txt", "target full", 1,
real_file_name, hn, (unsigned long)real_size,
(unsigned long long)amount_space_avail);
goto out;
}
//.........这里部分代码省略.........
示例6: orte_grpcomm_base_comm_start
int orte_grpcomm_base_comm_start(void)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:receive start comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (!recv_issued) {
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_COLLECTIVE,
ORTE_RML_PERSISTENT,
daemon_local_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_XCAST,
ORTE_RML_PERSISTENT,
orte_grpcomm_base_xcast_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_DAEMON_COLL,
ORTE_RML_PERSISTENT,
daemon_coll_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ROLLUP,
ORTE_RML_PERSISTENT,
orte_grpcomm_base_rollup_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
}
if (ORTE_PROC_IS_HNP) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_COLL_ID_REQ,
ORTE_RML_PERSISTENT,
coll_id_req, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
}
recv_issued = true;
} else if (ORTE_PROC_IS_APP) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_COLLECTIVE,
ORTE_RML_PERSISTENT,
app_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
recv_issued = true;
}
}
return ORTE_SUCCESS;
}
示例7: mca_btl_portals4_get
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
void *local_address,
uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size,
int flags,
int order,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext,
void *cbdata)
{
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
mca_btl_portals4_frag_t *frag = NULL;
ptl_md_t md;
int ret;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress();
}
OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag){
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p",
portals4_btl->portals_outstanding_ops, (void *)frag));
frag->rdma_cb.func = cbfunc;
frag->rdma_cb.context = cbcontext;
frag->rdma_cb.data = cbdata;
frag->rdma_cb.local_handle = local_handle;
frag->endpoint = btl_peer;
frag->hdr.tag = MCA_BTL_TAG_MAX;
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret);
return OPAL_ERROR;
}
frag->match_bits = remote_handle->key;
frag->addr = local_address;
frag->length = size;
frag->peer_proc = btl_peer->ptl_proc;
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
ret = PtlGet(portals4_btl->send_md_h,
(ptl_size_t) local_address,
size,
btl_peer->ptl_proc,
portals4_btl->recv_idx,
frag->match_bits, /* match bits */
0,
frag);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: PtlGet failed: %d",
__FILE__, __LINE__, ret);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
return OPAL_SUCCESS;
}
示例8: orte_state_base_track_procs
void orte_state_base_track_procs(int fd, short argc, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_process_name_t *proc = &caddy->name;
orte_proc_state_t state = caddy->proc_state;
orte_job_t *jdata;
orte_proc_t *pdata;
int i;
opal_output_verbose(5, orte_state_base_framework.framework_output,
"%s state:base:track_procs called for proc %s state %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
orte_proc_state_to_str(state));
/* get the job object for this proc */
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto cleanup;
}
pdata = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
if (ORTE_PROC_STATE_RUNNING == state) {
/* update the proc state */
pdata->state = state;
jdata->num_launched++;
if (jdata->num_launched == jdata->num_procs) {
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS);
} else {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_RUNNING);
}
}
} else if (ORTE_PROC_STATE_REGISTERED == state) {
/* update the proc state */
pdata->state = state;
jdata->num_reported++;
if (jdata->num_reported == jdata->num_procs) {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_REGISTERED);
}
} else if (ORTE_PROC_STATE_IOF_COMPLETE == state) {
/* update the proc state */
pdata->state = state;
/* Release only the stdin IOF file descriptor for this child, if one
* was defined. File descriptors for the other IOF channels - stdout,
* stderr, and stddiag - were released when their associated pipes
* were cleared and closed due to termination of the process
*/
if (NULL != orte_iof.close) {
orte_iof.close(proc, ORTE_IOF_STDIN);
}
ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_IOF_COMPLETE);
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_WAITPID)) {
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
}
} else if (ORTE_PROC_STATE_WAITPID_FIRED == state) {
/* update the proc state */
pdata->state = state;
ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID);
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_IOF_COMPLETE)) {
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
}
} else if (ORTE_PROC_STATE_TERMINATED == state) {
/* update the proc state */
ORTE_FLAG_UNSET(pdata, ORTE_PROC_FLAG_ALIVE);
pdata->state = state;
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_LOCAL)) {
/* Clean up the session directory as if we were the process
* itself. This covers the case where the process died abnormally
* and didn't cleanup its own session directory.
*/
orte_session_dir_finalize(proc);
}
/* if we are trying to terminate and our routes are
* gone, then terminate ourselves IF no local procs
* remain (might be some from another job)
*/
if (orte_orteds_term_ordered &&
0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
/* at least one is still alive */
goto cleanup;
}
}
/* call our appropriate exit procedure */
OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
"%s state:base all routes and children gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
goto cleanup;
}
/* return the allocated slot for reuse */
cleanup_node(pdata);
/* track job status */
jdata->num_terminated++;
if (jdata->num_terminated == jdata->num_procs) {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED);
}
//.........这里部分代码省略.........
示例9: orte_state_base_check_all_complete
void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata = caddy->jdata;
orte_proc_t *proc;
int i;
orte_std_cntr_t j;
orte_job_t *job;
orte_node_t *node;
orte_job_map_t *map;
orte_std_cntr_t index;
bool one_still_alive;
orte_vpid_t lowest=0;
int32_t i32, *i32ptr;
opal_output_verbose(2, orte_state_base_framework.framework_output,
"%s state:base:check_job_complete on job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
/* just check to see if the daemons are complete */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:base:check_job_complete - received NULL job, checking daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
goto CHECK_DAEMONS;
} else {
/* mark the job as terminated, but don't override any
* abnormal termination flags
*/
if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) {
jdata->state = ORTE_JOB_STATE_TERMINATED;
}
}
/* tell the IOF that the job is complete */
if (NULL != orte_iof.complete) {
orte_iof.complete(jdata);
}
i32ptr = &i32;
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) {
if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
/* update the exit code */
ORTE_UPDATE_EXIT_STATUS(lowest);
}
/* warn user */
opal_output(orte_clean_output,
"-------------------------------------------------------\n"
"While %s job %s terminated normally, %d %s. Further examination may be required.\n"
"-------------------------------------------------------",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code." :
"processes returned\nnon-zero exit codes.");
}
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(jdata->state)));
/* if this job is a continuously operating one, then don't do
* anything further - just return here
*/
if (NULL != jdata &&
(orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) ||
ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) {
goto CHECK_ALIVE;
}
/* if the job that is being checked is the HNP, then we are
* trying to terminate the orteds. In that situation, we
* do -not- check all jobs - we simply notify the HNP
* that the orteds are complete. Also check special case
* if jdata is NULL - we want
* to definitely declare the job done if the orteds
* have completed, no matter what else may be happening.
* This can happen if a ctrl-c hits in the "wrong" place
* while launching
*/
CHECK_DAEMONS:
if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
if (0 == orte_routed.num_routes()) {
/* orteds are done! */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s orteds complete - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (NULL == jdata) {
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
}
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
OBJ_RELEASE(caddy);
return;
}
OBJ_RELEASE(caddy);
return;
//.........这里部分代码省略.........
示例10: orte_regex_extract_node_names
int orte_regex_extract_node_names(char *regexp, char ***names)
{
int i, j, len, ret;
char *base;
char *orig;
bool found_range = false;
bool more_to_come = false;
if (NULL == regexp) {
*names = NULL;
return ORTE_SUCCESS;
}
orig = base = strdup(regexp);
if (NULL == base) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s regex:extract:nodenames: checking nodelist: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
regexp));
do {
/* Find the base */
len = strlen(base);
for (i = 0; i <= len; ++i) {
if (base[i] == '[') {
/* we found a range. this gets dealt with below */
base[i] = '\0';
found_range = true;
break;
}
if (base[i] == ',') {
/* we found a singleton node, and there are more to come */
base[i] = '\0';
found_range = false;
more_to_come = true;
break;
}
if (base[i] == '\0') {
/* we found a singleton node */
found_range = false;
more_to_come = false;
break;
}
}
if(i == 0) {
/* we found a special character at the beginning of the string */
orte_show_help("help-regex.txt", "regex:special-char", true, regexp);
free(orig);
return ORTE_ERR_BAD_PARAM;
}
if (found_range) {
/* If we found a range, now find the end of the range */
for (j = i; j < len; ++j) {
if (base[j] == ']') {
base[j] = '\0';
break;
}
}
if (j >= len) {
/* we didn't find the end of the range */
orte_show_help("help-regex.txt", "regex:end-range-missing", true, regexp);
free(orig);
return ORTE_ERR_BAD_PARAM;
}
ret = regex_parse_node_ranges(base, base + i + 1, names);
if(ORTE_SUCCESS != ret) {
orte_show_help("help-regex.txt", "regex:bad-value", true, regexp);
free(orig);
return ret;
}
if(base[j + 1] == ',') {
more_to_come = true;
base = &base[j + 2];
} else {
more_to_come = false;
}
} else {
/* If we didn't find a range, just add the node */
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s regex:extract:nodenames: found node: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), base));
if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(names, base))) {
ORTE_ERROR_LOG(ret);
free(orig);
return ret;
}
/* set base equal to the (possible) next base to look at */
base = &base[i + 1];
}
} while(more_to_come);
free(orig);
//.........这里部分代码省略.........
示例11: orte_state_base_activate_job_state
void orte_state_base_activate_job_state(orte_job_t *jdata,
orte_job_state_t state)
{
opal_list_item_t *itm, *any=NULL, *error=NULL;
orte_state_t *s;
orte_state_caddy_t *caddy;
for (itm = opal_list_get_first(&orte_job_states);
itm != opal_list_get_end(&orte_job_states);
itm = opal_list_get_next(itm)) {
s = (orte_state_t*)itm;
if (s->job_state == ORTE_JOB_STATE_ANY) {
/* save this place */
any = itm;
}
if (s->job_state == ORTE_JOB_STATE_ERROR) {
error = itm;
}
if (s->job_state == state) {
OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
"%s ACTIVATING JOB %s STATE %s PRI %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(state), s->priority));
if (NULL == s->cbfunc) {
OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
"%s NULL CBFUNC FOR JOB %s STATE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "ALL" : ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(state)));
return;
}
caddy = OBJ_NEW(orte_state_caddy_t);
if (NULL != jdata) {
caddy->jdata = jdata;
caddy->job_state = state;
OBJ_RETAIN(jdata);
}
opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
opal_event_set_priority(&caddy->ev, s->priority);
opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
return;
}
}
/* if we get here, then the state wasn't found, so execute
* the default handler if it is defined
*/
if (ORTE_JOB_STATE_ERROR < state && NULL != error) {
s = (orte_state_t*)error;
} else if (NULL != any) {
s = (orte_state_t*)any;
} else {
OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
"ACTIVATE: ANY STATE NOT FOUND"));
return;
}
if (NULL == s->cbfunc) {
OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
"ACTIVATE: ANY STATE HANDLER NOT DEFINED"));
return;
}
caddy = OBJ_NEW(orte_state_caddy_t);
if (NULL != jdata) {
caddy->jdata = jdata;
caddy->job_state = state;
OBJ_RETAIN(jdata);
}
OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
"%s ACTIVATING JOB %s STATE %s PRI %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(state), s->priority));
opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
opal_event_set_priority(&caddy->ev, s->priority);
opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
}
示例12: orte_rmaps_seq_map
/*
* Sequentially map the ranks according to the placement in the
* specified hostfile
*/
static int orte_rmaps_seq_map(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_app_context_t *app;
int i, n;
orte_std_cntr_t j;
opal_list_item_t *item;
orte_node_t *node, *nd;
seq_node_t *sq, *save=NULL, *seq;;
orte_vpid_t vpid;
orte_std_cntr_t num_nodes;
int rc;
opal_list_t default_seq_list;
opal_list_t node_list, *seq_list, sq_list;
orte_proc_t *proc;
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
char *hosts, *hstname, *sep, *eptr;
FILE *fp;
#if OPAL_HAVE_HWLOC
opal_hwloc_resource_type_t rtype;
#endif
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output,
"%s rmaps:seq called on job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* this mapper can only handle initial launch
* when seq mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s is being restarted - seq cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (NULL != jdata->map->req_mapper) {
if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s not using sequential mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
/* we need to process it */
goto process;
}
if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
/* I don't know how to do these - defer */
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s not using seq mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
process:
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
if (NULL != jdata->map->last_mapper) {
free(jdata->map->last_mapper);
}
jdata->map->last_mapper = strdup(c->mca_component_name);
/* convenience def */
map = jdata->map;
/* if there is a default hostfile, go and get its ordered list of nodes */
OBJ_CONSTRUCT(&default_seq_list, opal_list_t);
if (NULL != orte_default_hostfile) {
/* open the file */
fp = fopen(orte_default_hostfile, "r");
if (NULL == fp) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
while (NULL != (hstname = orte_getline(fp))) {
if (0 == strlen(hstname)) {
/* blank line - ignore */
continue;
}
sq = OBJ_NEW(seq_node_t);
if (NULL != (sep = strchr(hstname, ' '))) {
*sep = '\0';
sep++;
/* remove any trailing space */
eptr = sep + strlen(sep) - 1;
while (eptr > sep && isspace(*eptr)) {
eptr--;
}
*(eptr+1) = 0;
sq->cpuset = strdup(sep);
//.........这里部分代码省略.........
示例13: orte_util_get_ordered_host_list
int orte_util_get_ordered_host_list(opal_list_t *nodes,
char *hostfile)
{
opal_list_t exclude;
opal_list_item_t *item, *itm, *item2, *item1;
char *cptr;
int num_empty, i, nodeidx, startempty=0;
bool want_all_empty=false;
orte_node_t *node_from_pool, *newnode;
int rc;
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s hostfile: creating ordered list of hosts from hostfile %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
OBJ_CONSTRUCT(&exclude, opal_list_t);
/* parse the hostfile and add the contents to the list, keeping duplicates */
if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) {
goto cleanup;
}
/* parse the nodes to process any relative node directives */
item2 = opal_list_get_first(nodes);
while (item2 != opal_list_get_end(nodes)) {
orte_node_t *node=(orte_node_t*)item2;
/* save the next location in case this one gets removed */
item1 = opal_list_get_next(item2);
if ('+' != node->name[0]) {
item2 = item1;
continue;
}
/* see if we specified empty nodes */
if ('e' == node->name[1] ||
'E' == node->name[1]) {
/* request for empty nodes - do they want
* all of them?
*/
if (NULL != (cptr = strchr(node->name, ':'))) {
/* the colon indicates a specific # are requested */
cptr++; /* step past : */
num_empty = strtol(cptr, NULL, 10);
} else {
/* want them all - set num_empty to max */
num_empty = INT_MAX;
want_all_empty = true;
}
/* insert empty nodes into newnodes list in place of the current item.
* since item1 is the next item, we insert in front of it
*/
if (!orte_hnp_is_allocated && 0 == startempty) {
startempty = 1;
}
for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) {
if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (0 == node_from_pool->slots_inuse) {
newnode = OBJ_NEW(orte_node_t);
newnode->name = strdup(node_from_pool->name);
/* if the slot count here is less than the
* total slots avail on this node, set it
* to the specified count - this allows people
* to subdivide an allocation
*/
if (node->slots < node_from_pool->slots) {
newnode->slots_alloc = node->slots;
} else {
newnode->slots_alloc = node_from_pool->slots;
}
opal_list_insert_pos(nodes, item1, &newnode->super);
/* track number added */
--num_empty;
}
}
/* bookmark where we stopped in case they ask for more */
startempty = i;
/* did they get everything they wanted? */
if (!want_all_empty && 0 < num_empty) {
orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
true, num_empty);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* since we have expanded the provided node, remove
* it from list
*/
opal_list_remove_item(nodes, item2);
OBJ_RELEASE(item2);
} else if ('n' == node->name[1] ||
'N' == node->name[1]) {
/* they want a specific relative node #, so
* look it up on global pool
*/
nodeidx = strtol(&node->name[2], NULL, 10);
/* if the HNP is not allocated, then we need to
* adjust the index as the node pool is offset
//.........这里部分代码省略.........
示例14: orte_util_filter_hostfile_nodes
/* Parse the provided hostfile and filter the nodes that are
* on the input list, removing those that
* are not found in the hostfile
*/
int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
char *hostfile)
{
opal_list_t newnodes, exclude;
opal_list_item_t *item1, *item2, *next, *item3;
orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3;
int rc = ORTE_SUCCESS;
char *cptr;
int num_empty, nodeidx;
bool want_all_empty = false;
opal_list_t keep;
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s hostfile: filtering nodes through hostfile %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
/* parse the hostfile and create local list of findings */
OBJ_CONSTRUCT(&newnodes, opal_list_t);
OBJ_CONSTRUCT(&exclude, opal_list_t);
if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) {
OBJ_DESTRUCT(&newnodes);
return rc;
}
/* remove from the list of newnodes those that are in the exclude list
* since we could have added duplicate names above due to the */
while (NULL != (item1 = opal_list_remove_first(&exclude))) {
node_from_file = (orte_node_t*)item1;
/* check for matches on nodes */
for (item2 = opal_list_get_first(&newnodes);
item2 != opal_list_get_end(&newnodes);
item2 = opal_list_get_next(item2)) {
orte_node_t *node = (orte_node_t*)item2;
if (0 == strcmp(node_from_file->name, node->name)) {
/* match - remove it */
opal_list_remove_item(&newnodes, item2);
OBJ_RELEASE(item2);
break;
}
}
OBJ_RELEASE(item1);
}
/* now check our nodes and keep those that match. We can
* destruct our hostfile list as we go since this won't be needed
*/
OBJ_CONSTRUCT(&keep, opal_list_t);
while (NULL != (item2 = opal_list_remove_first(&newnodes))) {
node_from_file = (orte_node_t*)item2;
next = opal_list_get_next(item2);
/* see if this is a relative node syntax */
if ('+' == node_from_file->name[0]) {
/* see if we specified empty nodes */
if ('e' == node_from_file->name[1] ||
'E' == node_from_file->name[1]) {
/* request for empty nodes - do they want
* all of them?
*/
if (NULL != (cptr = strchr(node_from_file->name, ':'))) {
/* the colon indicates a specific # are requested */
cptr++; /* step past : */
num_empty = strtol(cptr, NULL, 10);
} else {
/* want them all - set num_empty to max */
num_empty = INT_MAX;
want_all_empty = true;
}
/* search the list of nodes provided to us and find those
* that are empty
*/
item1 = opal_list_get_first(nodes);
while (0 < num_empty && item1 != opal_list_get_end(nodes)) {
node_from_list = (orte_node_t*)item1;
next = opal_list_get_next(item1); /* keep our place */
if (0 == node_from_list->slots_inuse) {
/* check to see if this node is explicitly called
* out later - if so, don't use it here
*/
for (item3 = opal_list_get_first(&newnodes);
item3 != opal_list_get_end(&newnodes);
item3 = opal_list_get_next(item3)) {
node3 = (orte_node_t*)item3;
if (0 == strcmp(node3->name, node_from_list->name)) {
/* match - don't use it */
goto skipnode;
}
}
/* remove item from list */
opal_list_remove_item(nodes, item1);
/* xfer to keep list */
opal_list_append(&keep, item1);
--num_empty;
}
skipnode:
//.........这里部分代码省略.........
示例15: orte_grpcomm_base_progress_collectives
void orte_grpcomm_base_progress_collectives(void)
{
opal_list_item_t *item;
orte_grpcomm_collective_t *coll;
orte_namelist_t *nm;
orte_job_t *jdata;
opal_buffer_t *relay;
int rc;
/* cycle thru all known collectives - any collective on the list
* must have come from either a local proc or receiving a global
* collective. Either way, the number of required recipients
* is the number of local procs for that job
*/
item = opal_list_get_first(&orte_grpcomm_base.active_colls);
while (item != opal_list_get_end(&orte_grpcomm_base.active_colls)) {
coll = (orte_grpcomm_collective_t*)item;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s PROGRESSING COLL id %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
coll->id));
/* if this collective is already locally complete, then ignore it */
if (coll->locally_complete) {
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s COLL %d IS LOCALLY COMPLETE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
coll->id));
goto next_coll;
}
/* get the jobid of the participants in this collective */
if (NULL == (nm = (orte_namelist_t*)opal_list_get_first(&coll->participants))) {
opal_output(0, "NO PARTICIPANTS");
goto next_coll;
}
/* get the job object for this participant */
if (NULL == (jdata = orte_get_job_data_object(nm->name.jobid))) {
/* if the job object isn't found, then we can't progress
* this collective
*/
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s COLL %d JOBID %s NOT FOUND",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
coll->id, ORTE_JOBID_PRINT(nm->name.jobid)));
goto next_coll;
}
/* all local procs from this job are required to participate */
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s ALL LOCAL PROCS FOR JOB %s CONTRIBUTE %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid),
(int)jdata->num_local_procs));
/* see if all reqd participants are done */
if (jdata->num_local_procs == coll->num_local_recvd) {
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s COLLECTIVE %d LOCALLY COMPLETE - SENDING TO GLOBAL COLLECTIVE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), coll->id));
/* mark it as locally complete */
coll->locally_complete = true;
/* pack the collective */
relay = OBJ_NEW(opal_buffer_t);
orte_grpcomm_base_pack_collective(relay, jdata->jobid,
coll, ORTE_GRPCOMM_INTERNAL_STG_LOCAL);
/* send it to our global collective handler */
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay,
ORTE_RML_TAG_DAEMON_COLL, 0,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(relay);
}
}
next_coll:
item = opal_list_get_next(item);
}
}