本文整理汇总了C++中ORTE_JOB_FAMILY函数的典型用法代码示例。如果您正苦于以下问题:C++ ORTE_JOB_FAMILY函数的具体用法?C++ ORTE_JOB_FAMILY怎么用?C++ ORTE_JOB_FAMILY使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ORTE_JOB_FAMILY函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: route_is_defined
static bool route_is_defined(const orte_process_name_t *target)
{
int i;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
/* if the route is to a different job family and we are the HNP, look it up */
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
if (ORTE_PROC_IS_HNP) {
jfamily = ORTE_JOB_FAMILY(target->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed_radix: route to %s is defined",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(target->jobid)));
return true;
}
}
return false;
}
/* if we are not the HNP, then the answer is always true as
* we send it via the HNP
*/
return true;
}
/* find out what daemon hosts this proc */
if (ORTE_VPID_INVALID == orte_get_proc_daemon_vpid((orte_process_name_t*)target)) {
return false;
}
return true;
}
示例2: orte_routed_base_update_hnps
void orte_routed_base_update_hnps(opal_buffer_t *buf)
{
int n, rc;
char *uri;
orte_process_name_t name;
orte_routed_jobfam_t *jfam;
uint16_t jobfamily;
n = 1;
while (ORTE_SUCCESS == opal_dss.unpack(buf, &uri, &n, OPAL_STRING)) {
/*extract the name */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(uri, &name, NULL))) {
ORTE_ERROR_LOG(rc);
free(uri);
n=1;
continue;
}
jobfamily = ORTE_JOB_FAMILY(name.jobid);
/* see if we already have this connection */
for (n=0; n < orte_routed_jobfams.size; n++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams,n))) {
continue;
}
if (jobfamily == jfam->job_family) {
/* update uri */
if (NULL != jfam->hnp_uri) {
free(jfam->hnp_uri);
}
jfam->hnp_uri = strdup(uri);
OPAL_OUTPUT_VERBOSE((10, orte_routed_base_framework.framework_output,
"%s adding remote HNP %s\n\t%s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), uri));
goto done;
}
}
/* nope - create it */
jfam = OBJ_NEW(orte_routed_jobfam_t);
jfam->job_family = jobfamily;
jfam->route.jobid = name.jobid;
jfam->route.vpid = name.vpid;
jfam->hnp_uri = strdup(uri);
done:
free(uri);
n=1;
}
}
示例3: _setup_jobfam_session_dir
static int _setup_jobfam_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if (NULL == orte_process_info.jobfam_session_dir) {
if (ORTE_SUCCESS != (rc = _setup_top_session_dir())) {
return rc;
}
if (ORTE_PROC_IS_HNP) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/pid.%lu", orte_process_info.top_session_dir,
(unsigned long)orte_process_info.pid) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
/* we were not given one, so define it */
if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid) ) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jobfam", orte_process_info.top_session_dir) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jf.%d", orte_process_info.top_session_dir,
ORTE_JOB_FAMILY(proc->jobid))) {
orte_process_info.jobfam_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
示例4: orte_routed_base_open
static int orte_routed_base_open(mca_base_open_flag_t flags)
{
orte_routed_jobfam_t *jfam;
orte_routed_base_wait_sync = false;
/* Initialize storage of remote hnp uris */
OBJ_CONSTRUCT(&orte_routed_jobfams, opal_pointer_array_t);
opal_pointer_array_init(&orte_routed_jobfams, 8, INT_MAX, 8);
/* prime it with our HNP uri */
jfam = OBJ_NEW(orte_routed_jobfam_t);
jfam->route.jobid = ORTE_PROC_MY_HNP->jobid;
jfam->route.vpid = ORTE_PROC_MY_HNP->vpid;
jfam->job_family = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
if (NULL != orte_process_info.my_hnp_uri) {
jfam->hnp_uri = strdup(orte_process_info.my_hnp_uri);
}
opal_pointer_array_add(&orte_routed_jobfams, jfam);
/* Open up all available components */
return mca_base_framework_components_open(&orte_routed_base_framework, flags);
}
示例5: free
static char *orte_build_job_session_dir(char *top_dir,
orte_process_name_t *proc,
orte_jobid_t jobid)
{
char *jobfam = NULL;
char *job_session_dir;
if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL;
}
if (ORTE_JOBID_WILDCARD != jobid) {
char *job = NULL;
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
job_session_dir = NULL;
goto out;
}
job_session_dir = opal_os_path(false, top_dir, jobfam, job, NULL);
free(job);
if (NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
} else {
job_session_dir = opal_os_path(false, top_dir, jobfam, NULL);
if( NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
}
out:
free(jobfam);
return job_session_dir;
}
示例6: rte_init
//.........这里部分代码省略.........
/* we are a singleton, so there is only one proc in the job */
orte_process_info.num_procs = 1;
/* push into the environ for pickup in MPI layer for
* MPI-3 required info key
*/
if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
putenv(ev1);
added_num_procs = true;
}
if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
putenv(ev2);
added_app_ctx = true;
}
/* get our app number from PMI - ok if not found */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM,
ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
orte_process_info.app_num = u32;
} else {
orte_process_info.app_num = 0;
}
/* set some other standard values */
orte_process_info.num_local_peers = 0;
/* setup transport keys in case the MPI layer needs them -
* we can use the jobfam and stepid as unique keys
* because they are unique values assigned by the RM
*/
if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
putenv(envar);
added_transport_keys = true;
/* cannot free the envar as that messes up our environ */
free(string_key);
}
/* retrieve our topology */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO,
&name, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* load the topology */
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
ret = OPAL_ERROR;
free(val);
error = "setting topology";
goto error;
}
if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
ret = OPAL_ERROR;
free(val);
hwloc_topology_destroy(opal_hwloc_topology);
error = "setting topology";
goto error;
}
/* since we are loading this from an external source, we have to
* explicitly set a flag so hwloc sets things up correctly
示例7: get_route
static orte_process_name_t get_route(orte_process_name_t *target)
{
orte_process_name_t *ret, daemon;
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID) {
ret = ORTE_NAME_INVALID;
goto found;
}
/* initialize */
daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
daemon.vpid = ORTE_PROC_MY_DAEMON->vpid;
if (ORTE_PROC_IS_APP) {
/* if I am an application, AND I have knowledge of
* my daemon (i.e., a daemon launched me), then I
* always route thru the daemon */
if (NULL != orte_process_info.my_daemon_uri) {
ret = ORTE_PROC_MY_DAEMON;
} else {
/* I was direct launched and do not have
* a daemon, so I have to route direct */
ret = target;
}
goto found;
}
/* if I am a tool, the route is direct if target is in
* my own job family, and to the target's HNP if not
*/
if (ORTE_PROC_IS_TOOL) {
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
ret = target;
goto found;
} else {
ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid);
ret = &daemon;
goto found;
}
}
/****** HNP AND DAEMONS ONLY ******/
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routing direct to the HNP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ret = ORTE_PROC_MY_HNP;
goto found;
}
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
/* find out what daemon hosts this proc */
if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
ret = ORTE_NAME_INVALID;
goto found;
}
/* if the daemon is me, then send direct to the target! */
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
ret = target;
goto found;
}
/* else route to this daemon directly */
ret = &daemon;
found:
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed_direct_get(%s) --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(ret)));
return *ret;
}
示例8: orte_iof_hnp_send_data_to_endpoint
int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
orte_process_name_t *target,
orte_iof_tag_t tag,
unsigned char *data, int numbytes)
{
opal_buffer_t *buf;
int rc;
/* if the host is a daemon and we are in the process of aborting,
* then ignore this request. We leave it alone if the host is not
* a daemon because it might be a tool that wants to watch the
* output from an abort procedure
*/
if (ORTE_JOB_FAMILY(host->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)
&& orte_job_term_ordered) {
return ORTE_SUCCESS;
}
buf = OBJ_NEW(opal_buffer_t);
/* pack the tag - we do this first so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* pack the name of the target - this is either the intended
* recipient (if the tag is stdin and we are sending to a daemon),
* or the source (if we are sending to anyone else)
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, target, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* if data is NULL, then we are done */
if (NULL != data) {
/* pack the data - if numbytes is zero, we will pack zero bytes */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, data, numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
}
/* if the target is wildcard, then this needs to go to everyone - xcast it */
if (ORTE_PROC_MY_NAME->jobid == host->jobid &&
ORTE_VPID_WILDCARD == host->vpid) {
/* xcast this to everyone - the local daemons will know how to handle it */
orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, buf, ORTE_RML_TAG_IOF_PROXY);
OBJ_RELEASE(buf);
return ORTE_SUCCESS;
}
/* send the buffer to the host - this is either a daemon or
* a tool that requested IOF
*/
if (0 > (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY,
0, send_cb, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
示例9: orte_session_dir_get_name
/*
* Construct the fullpath to the session directory
*/
int
orte_session_dir_get_name(char **fulldirpath,
char **return_prefix, /* This will come back as the valid tmp dir */
char **return_frontend,
char *hostid,
char *batchid,
orte_process_name_t *proc) {
char *hostname = NULL,
*batchname = NULL,
*sessions = NULL,
*user = NULL,
*prefix = NULL,
*frontend = NULL,
*jobfam = NULL,
*job = NULL,
*vpidstr = NULL;
bool prefix_provided = false;
int exit_status = ORTE_SUCCESS;
size_t len;
int uid;
struct passwd *pwdent;
/* Ensure that system info is set */
orte_proc_info();
/* get the name of the user */
uid = getuid();
#ifdef HAVE_GETPWUID
pwdent = getpwuid(uid);
#else
pwdent = NULL;
#endif
if (NULL != pwdent) {
user = strdup(pwdent->pw_name);
} else {
orte_show_help("help-orte-runtime.txt",
"orte:session:dir:nopwname", true);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/*
* set the 'hostname'
*/
if( NULL != hostid) { /* User specified version */
hostname = strdup(hostid);
}
else { /* check if it is set elsewhere */
if( NULL != orte_process_info.nodename)
hostname = strdup(orte_process_info.nodename);
else {
/* Couldn't find it, so fail */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
exit_status = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
}
/*
* set the 'batchid'
*/
if (NULL != batchid)
batchname = strdup(batchid);
else
batchname = strdup("0");
/*
* get the front part of the session directory
* Will look something like:
* [email protected]_BATCHID
*/
if (NULL != orte_process_info.top_session_dir) {
frontend = strdup(orte_process_info.top_session_dir);
}
else { /* If not set then construct it */
if (0 > asprintf(&frontend, "openmpi-sessions-%[email protected]%s_%s", user, hostname, batchname)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
/*
* Construct the session directory
*/
/* If we were given a valid vpid then we can construct it fully into:
* [email protected]_BATCHID/JOB-FAMILY/JOBID/VPID
*/
if( NULL != proc) {
if (ORTE_VPID_INVALID != proc->vpid) {
if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
//.........这里部分代码省略.........
示例10: route_lost
static int route_lost(const orte_process_name_t *route)
{
opal_list_item_t *item;
orte_routed_tree_t *child;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
int i;
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s route to %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(route)));
/* if the route is to a different job family and we are the HNP, look it up */
if ((ORTE_JOB_FAMILY(route->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
ORTE_PROC_IS_HNP) {
jfamily = ORTE_JOB_FAMILY(route->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed_radix: route to %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(route->jobid)));
opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL);
OBJ_RELEASE(jfam);
break;
}
}
}
/* if we lose the connection to the lifeline and we are NOT already,
* in finalize, tell the OOB to abort.
* NOTE: we cannot call abort from here as the OOB needs to first
* release a thread-lock - otherwise, we will hang!!
*/
if (!orte_finalizing &&
NULL != lifeline &&
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed:radix: Connection to lifeline %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(lifeline)));
return ORTE_ERR_FATAL;
}
/* if we are the HNP or daemon, and the route is a daemon,
* see if it is one of our children - if so, remove it
*/
if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) &&
route->jobid == ORTE_PROC_MY_NAME->jobid) {
for (item = opal_list_get_first(&my_children);
item != opal_list_get_end(&my_children);
item = opal_list_get_next(item)) {
child = (orte_routed_tree_t*)item;
if (child->vpid == route->vpid) {
opal_list_remove_item(&my_children, item);
OBJ_RELEASE(item);
return ORTE_SUCCESS;
}
}
}
/* we don't care about this one, so return success */
return ORTE_SUCCESS;
}
示例11: get_route
static orte_process_name_t get_route(orte_process_name_t *target)
{
orte_process_name_t *ret, daemon;
opal_list_item_t *item;
orte_routed_tree_t *child;
int i;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
if (!orte_routing_is_enabled) {
ret = target;
goto found;
}
/* initialize */
daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
daemon.vpid = ORTE_PROC_MY_DAEMON->vpid;
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID) {
ret = ORTE_NAME_INVALID;
goto found;
}
/* if it is me, then the route is just direct */
if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
ret = target;
goto found;
}
/* if I am an application process, always route via my local daemon */
if (ORTE_PROC_IS_APP) {
ret = ORTE_PROC_MY_DAEMON;
goto found;
}
/* if I am a tool, the route is direct if target is in
* my own job family, and to the target's HNP if not
*/
if (ORTE_PROC_IS_TOOL) {
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
ret = target;
goto found;
} else {
ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid);
ret = &daemon;
goto found;
}
}
/****** HNP AND DAEMONS ONLY ******/
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* if I am a daemon, route this via the HNP */
if (ORTE_PROC_IS_DAEMON) {
ret = ORTE_PROC_MY_HNP;
goto found;
}
/* if I am the HNP or a tool, then I stored a route to
* this job family, so look it up
*/
jfamily = ORTE_JOB_FAMILY(target->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed_binomial: route to %s found",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(target->jobid)));
ret = &jfam->route;
goto found;
}
}
/* not found - so we have no route */
ret = ORTE_NAME_INVALID;
goto found;
}
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* if this is going to the HNP, then send it direct if we don't know
* how to get there - otherwise, send it via the tree
*/
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
if (!hnp_direct || orte_static_ports) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routing to the HNP through my parent %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT)));
ret = ORTE_PROC_MY_PARENT;
goto found;
} else {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routing direct to the HNP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ret = ORTE_PROC_MY_HNP;
//.........这里部分代码省略.........
示例12: delete_route
static int delete_route(orte_process_name_t *proc)
{
int i;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
if (proc->jobid == ORTE_JOBID_INVALID ||
proc->vpid == ORTE_VPID_INVALID) {
return ORTE_ERR_BAD_PARAM;
}
/* if I am an application process, I don't have any routes
* so there is nothing for me to do
*/
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
"%s routed_radix_delete_route for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/* if this is from a different job family, then I need to
* look it up appropriately
*/
if (ORTE_JOB_FAMILY(proc->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* if I am a daemon, then I will automatically route
* anything to this job family via my HNP - so I have nothing
* in my routing table and thus have nothing to do
* here, just return
*/
if (ORTE_PROC_IS_DAEMON) {
return ORTE_SUCCESS;
}
/* see if this job family is present */
jfamily = ORTE_JOB_FAMILY(proc->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
"%s routed_binomial: deleting route to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(proc->jobid)));
opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL);
OBJ_RELEASE(jfam);
return ORTE_SUCCESS;
}
}
/* not present - nothing to do */
return ORTE_SUCCESS;
}
/* THIS CAME FROM OUR OWN JOB FAMILY...there is nothing
* to do here. The routes will be redefined when we update
* the routing tree
*/
return ORTE_SUCCESS;
}
示例13: update_route
static int update_route(orte_process_name_t *target,
orte_process_name_t *route)
{
int rc;
orte_process_name_t * route_copy;
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID) {
return ORTE_ERR_BAD_PARAM;
}
/* if I am an application process, we don't update the route since
* we automatically route everything through the local daemon
*/
if (ORTE_PROC_IS_APP) {
return ORTE_SUCCESS;
}
/* if the job family is zero, then this is going to a local slave,
* so the path is direct and there is nothing to do here
*/
if (0 == ORTE_JOB_FAMILY(target->jobid)) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_linear_update: %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(route)));
/* if this is from a different job family, then I need to
* track how to send messages to it
*/
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* if I am a daemon, then I will automatically route
* anything to this job family via my HNP - so nothing to do
* here, just return
*/
if (ORTE_PROC_IS_DAEMON) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_linear_update: diff job family routing job %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(target->jobid),
ORTE_NAME_PRINT(route)));
/* see if this target is already present - it will have a wildcard vpid,
* so we have to look for it with that condition
*/
rc = opal_hash_table_get_value_uint32(&jobfam_list,
ORTE_JOB_FAMILY(target->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&jobfam_list,
ORTE_JOB_FAMILY(target->jobid), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not there, so add the route FOR THE JOB FAMILY*/
route_copy = (orte_process_name_t *) malloc(sizeof(orte_process_name_t));
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&jobfam_list,
ORTE_JOB_FAMILY(target->jobid), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* THIS CAME FROM OUR OWN JOB FAMILY... */
opal_output(0, "%s CALL TO UPDATE ROUTE FOR OWN JOB FAMILY", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERR_NOT_SUPPORTED;
}
示例14: orte_rmcast_base_process_msg
void orte_rmcast_base_process_msg(orte_rmcast_msg_t *msg)
{
orte_rmcast_channel_t channel;
rmcast_base_recv_t *ptr, *recv=NULL;
orte_process_name_t name;
orte_rmcast_tag_t tag;
int8_t flag;
struct iovec *iovec_array=NULL;
int32_t iovec_count=0, i, n, isz;
int rc=ORTE_SUCCESS;
orte_rmcast_seq_t recvd_seq_num;
opal_list_item_t *item;
rmcast_seq_tracker_t *trkr, *tptr;
rmcast_recv_log_t *log, *logptr;
bool restart;
opal_buffer_t alert;
/* extract the header */
if (ORTE_SUCCESS != (rc = extract_hdr(msg->buf, &name, &channel, &tag, &restart, &recvd_seq_num))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if this message is from myself, ignore it */
if (name.jobid == ORTE_PROC_MY_NAME->jobid && name.vpid == ORTE_PROC_MY_NAME->vpid) {
OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output,
"%s rmcast:base:process_recv sent from myself: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
goto cleanup;
}
/* if this is a heartbeat and I am not a daemon, then ignore it
* to avoid swamping tools
*/
if (!ORTE_PROC_IS_DAEMON && ORTE_RMCAST_TAG_HEARTBEAT == tag) {
OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output,
"%s rmcast:base:process_recv ignoring heartbeat",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
goto cleanup;
}
/* if this message is from a different job family, ignore it unless
* it is on the system channel. We ignore these messages to avoid
* confusion between different jobs since we all may be sharing
* multicast channels. The system channel is left open to support
* cross-job communications for detecting multiple conflicting DVMs.
*/
if (ORTE_JOB_FAMILY(name.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) &&
(ORTE_RMCAST_SYS_CHANNEL != channel)) {
/* if we are not the HNP or a daemon, then we ignore this */
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output,
"%s rmcast:base:process_recv from a different job family: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
} else {
goto cleanup;
}
}
if (orte_rmcast_base.unreliable_xport) {
/* if the message is not on a system-specified channel, then check to see if we
* are missing any messages and need a resend
*/
if (ORTE_RMCAST_DYNAMIC_CHANNELS <= channel) {
log = NULL;
for (item = opal_list_get_first(&orte_rmcast_base.msg_logs);
item != opal_list_get_end(&orte_rmcast_base.msg_logs);
item = opal_list_get_next(item)) {
logptr = (rmcast_recv_log_t*)item;
/* look for this source */
if (name.jobid == logptr->name.jobid &&
name.vpid == logptr->name.vpid) {
log = logptr;
break;
}
}
if (NULL == log) {
/* new source */
log = OBJ_NEW(rmcast_recv_log_t);
log->name.jobid = name.jobid;
log->name.vpid = name.vpid;
opal_list_append(&orte_rmcast_base.msg_logs, &log->super);
}
/* look for the channel */
trkr = NULL;
for (item = opal_list_get_first(&log->last_msg);
item != opal_list_get_end(&log->last_msg);
item = opal_list_get_next(item)) {
tptr = (rmcast_seq_tracker_t*)item;
if (channel == tptr->channel) {
trkr = tptr;
break;
}
}
if (NULL == trkr) {
/* new channel */
trkr = OBJ_NEW(rmcast_seq_tracker_t);
trkr->channel = channel;
//.........这里部分代码省略.........
示例15: mca_oob_tcp_recv_handler
//.........这里部分代码省略.........
OPAL_TIMING_EVENT((&tm,"Msg received from %s",
ORTE_NAME_PRINT(&peer->name)));
/* we recvd all of the message */
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer->name),
ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
(int)peer->recv_msg->hdr.nbytes,
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
peer->recv_msg->hdr.tag);
/* am I the intended recipient (header was already converted back to host order)? */
if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
/* yes - post it to the RML for delivery */
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s DELIVERING TO RML",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
peer->recv_msg->data,
peer->recv_msg->hdr.nbytes);
OBJ_RELEASE(peer->recv_msg);
} else {
/* no - find the next hop in the route */
hop = orte_routed.get_route(&peer->recv_msg->hdr.dst);
if (hop.jobid == ORTE_JOBID_INVALID ||
hop.vpid == ORTE_VPID_INVALID) {
/* no hop known - post the error to the component
* and let the OOB see if there is another way
* to get there from here
*/
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s NO ROUTE TO %s FROM HERE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&peer->name));
/* let the component know about the problem */
ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_no_route);
/* cleanup */
OBJ_RELEASE(peer->recv_msg);
return;
} else {
/* does we know how to reach the next hop? */
memcpy(&ui64, (char*)&hop, sizeof(uint64_t));
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_module.peers, ui64, (void**)&relay)) {
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s ADDRESS OF NEXT HOP %s TO %s IS UNKNOWN",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&hop),
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst));
/* let the component know about the problem */
ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_hop_unknown);
/* cleanup */
OBJ_RELEASE(peer->recv_msg);
return;
}
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s ROUTING TO %s FROM HERE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&relay->name));
/* if this came from a different job family, then ensure
* we know how to return
*/
if (ORTE_JOB_FAMILY(peer->recv_msg->hdr.origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
orte_routed.update_route(&(peer->recv_msg->hdr.origin), &peer->name);
}
/* post the message for retransmission */
MCA_OOB_TCP_QUEUE_RELAY(peer->recv_msg, relay);
OBJ_RELEASE(peer->recv_msg);
}
}
peer->recv_msg = NULL;
return;
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
ORTE_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
return;
} else {
// report the error
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to recv message",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)));
/* turn off the recv event */
opal_event_del(&peer->recv_event);
ORTE_FORCED_TERMINATE(1);
return;
}
}
break;
default:
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)),
peer->state);
// mca_oob_tcp_peer_close(peer);
break;
}
}