本文整理汇总了C++中slurm_attr_init函数的典型用法代码示例。如果您正苦于以下问题:C++ slurm_attr_init函数的具体用法?C++ slurm_attr_init怎么用?C++ slurm_attr_init使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了slurm_attr_init函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: pty_thread_create
void pty_thread_create(srun_job_t *job)
{
slurm_addr_t pty_addr;
pthread_attr_t attr;
if ((job->pty_fd = slurm_init_msg_engine_port(0)) < 0) {
error("init_msg_engine_port: %m");
return;
}
if (slurm_get_stream_addr(job->pty_fd, &pty_addr) < 0) {
error("slurm_get_stream_addr: %m");
return;
}
job->pty_port = ntohs(((struct sockaddr_in) pty_addr).sin_port);
debug2("initialized job control port %hu", job->pty_port);
slurm_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
if ((pthread_create(&job->pty_id, &attr, &_pty_thread, (void *) job))) {
job->pty_id = 0;
error("pthread_create(pty_thread): %m");
}
slurm_attr_destroy(&attr);
}
示例2: msg_thr_create
int
msg_thr_create(slurmd_job_t *job)
{
int fd;
eio_obj_t *eio_obj;
pthread_attr_t attr;
int rc = SLURM_SUCCESS, retries = 0;
errno = 0;
fd = _domain_socket_create(conf->spooldir, conf->node_name,
job->jobid, job->stepid);
if (fd == -1)
return SLURM_ERROR;
fd_set_nonblocking(fd);
eio_obj = eio_obj_create(fd, &msg_socket_ops, (void *)job);
job->msg_handle = eio_handle_create();
eio_new_initial_obj(job->msg_handle, eio_obj);
slurm_attr_init(&attr);
while (pthread_create(&job->msgid, &attr,
&_msg_thr_internal, (void *)job)) {
error("msg_thr_create: pthread_create error %m");
if (++retries > MAX_RETRIES) {
error("msg_thr_create: Can't create pthread");
rc = SLURM_ERROR;
break;
}
usleep(10); /* sleep and again */
}
slurm_attr_destroy(&attr);
return rc;
}
示例3: _spawn_retry_agent
/* _spawn_retry_agent - pthread_create an agent for the given task */
static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr)
{
int retries = 0;
pthread_attr_t attr_agent;
pthread_t thread_agent;
if (agent_arg_ptr == NULL)
return;
debug2("Spawning RPC agent for msg_type %u",
agent_arg_ptr->msg_type);
slurm_attr_init(&attr_agent);
if (pthread_attr_setdetachstate(&attr_agent,
PTHREAD_CREATE_DETACHED))
error("pthread_attr_setdetachstate error %m");
while (pthread_create(&thread_agent, &attr_agent,
agent, (void *) agent_arg_ptr)) {
error("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
usleep(10000); /* sleep and retry */
}
slurm_attr_destroy(&attr_agent);
}
示例4: main
int main(int argc, char *argv[])
{
log_options_t log_opts = LOG_OPTS_INITIALIZER;
char *features, *save_ptr = NULL, *tok;
update_node_msg_t node_msg;
int rc = SLURM_SUCCESS;
hostlist_t hl = NULL;
char *node_name;
pthread_attr_t attr_work;
pthread_t thread_work = 0;
prog_name = argv[0];
_read_config();
log_opts.stderr_level = LOG_LEVEL_QUIET;
log_opts.syslog_level = LOG_LEVEL_QUIET;
if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES)
log_opts.logfile_level += 3;
(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);
/* Parse the MCDRAM and NUMA boot options */
if (argc == 3) {
features = xstrdup(argv[2]);
tok = strtok_r(features, ",", &save_ptr);
while (tok) {
printf("%s\n", tok);
if (!strcasecmp(tok, "a2a") ||
!strcasecmp(tok, "hemi") ||
!strcasecmp(tok, "quad") ||
!strcasecmp(tok, "snc2") ||
!strcasecmp(tok, "snc4")) {
xfree(mcdram_mode);
mcdram_mode = xstrdup(tok);
} else if (!strcasecmp(tok, "cache") ||
!strcasecmp(tok, "equal") ||
!strcasecmp(tok, "flat")) {
xfree(numa_mode);
numa_mode = xstrdup(tok);
}
tok = strtok_r(NULL, ",", &save_ptr);
}
xfree(features);
}
/* Spawn threads to change MCDRAM and NUMA states and start node
* reboot process */
if ((hl = hostlist_create(argv[1])) == NULL) {
error("%s: Invalid hostlist (%s)", prog_name, argv[1]);
exit(2);
}
node_bitmap = bit_alloc(100000);
while ((node_name = hostlist_pop(hl))) {
slurm_mutex_lock(&thread_cnt_mutex);
while (1) {
if (thread_cnt <= MAX_THREADS) {
thread_cnt++;
break;
} else { /* wait for state change and retry */
pthread_cond_wait(&thread_cnt_cond,
&thread_cnt_mutex);
}
}
slurm_mutex_unlock(&thread_cnt_mutex);
slurm_attr_init(&attr_work);
(void) pthread_attr_setdetachstate
(&attr_work, PTHREAD_CREATE_DETACHED);
if (pthread_create(&thread_work, &attr_work, _node_update,
(void *) node_name)) {
_node_update((void *) node_name);
}
slurm_attr_destroy(&attr_work);
}
/* Wait for work threads to complete */
slurm_mutex_lock(&thread_cnt_mutex);
while (1) {
if (thread_cnt == 0)
break;
else /* wait for state change and retry */
pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex);
}
slurm_mutex_unlock(&thread_cnt_mutex);
hostlist_destroy(hl);
xfree(mcdram_mode);
xfree(numa_mode);
/* Wait for all nodes to change state to "on" */
_wait_all_nodes_on();
if ((argc == 3) && !syscfg_path) {
slurm_init_update_node_msg(&node_msg);
node_msg.node_names = argv[1];
node_msg.features_act = argv[2];
rc = slurm_update_node(&node_msg);
}
if (rc == SLURM_SUCCESS) {
exit(0);
} else {
error("%s: slurm_update_node(\'%s\', \'%s\'): %s\n",
//.........这里部分代码省略.........
示例5: _forward_msg_internal
static void _forward_msg_internal(hostlist_t hl, hostlist_t* sp_hl,
forward_struct_t *fwd_struct,
header_t *header, int timeout,
int hl_count)
{
int j;
forward_msg_t *fwd_msg = NULL;
char *buf = NULL, *tmp_char = NULL;
pthread_attr_t attr_agent;
pthread_t thread_agent;
if (timeout <= 0)
/* convert secs to msec */
timeout = slurm_get_msg_timeout() * 1000;
for (j = 0; j < hl_count; j++) {
int retries = 0;
slurm_attr_init(&attr_agent);
if (pthread_attr_setdetachstate
(&attr_agent, PTHREAD_CREATE_DETACHED))
error("pthread_attr_setdetachstate error %m");
fwd_msg = xmalloc(sizeof(forward_msg_t));
fwd_msg->fwd_struct = fwd_struct;
fwd_msg->timeout = timeout;
memcpy(&fwd_msg->header.orig_addr,
&header->orig_addr,
sizeof(slurm_addr_t));
fwd_msg->header.version = header->version;
fwd_msg->header.flags = header->flags;
fwd_msg->header.msg_type = header->msg_type;
fwd_msg->header.body_length = header->body_length;
fwd_msg->header.ret_list = NULL;
fwd_msg->header.ret_cnt = 0;
if (sp_hl) {
buf = hostlist_ranged_string_xmalloc(sp_hl[j]);
hostlist_destroy(sp_hl[j]);
} else {
tmp_char = hostlist_shift(hl);
buf = xstrdup(tmp_char);
free(tmp_char);
}
forward_init(&fwd_msg->header.forward, NULL);
fwd_msg->header.forward.nodelist = buf;
while (pthread_create(&thread_agent, &attr_agent,
_forward_thread,
(void *)fwd_msg)) {
error("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
usleep(100000); /* sleep and try again */
}
slurm_attr_destroy(&attr_agent);
}
}
示例6: acct_gather_profile_startpoll
extern int acct_gather_profile_startpoll(char *freq, char *freq_def)
{
int retval = SLURM_SUCCESS;
pthread_attr_t attr;
int i;
uint32_t profile = ACCT_GATHER_PROFILE_NOT_SET;
if (acct_gather_profile_init() < 0)
return SLURM_ERROR;
if (acct_gather_profile_running) {
error("acct_gather_profile_startpoll: poll already started!");
return retval;
}
acct_gather_profile_running = true;
(*(ops.get))(ACCT_GATHER_PROFILE_RUNNING, &profile);
xassert(profile != ACCT_GATHER_PROFILE_NOT_SET);
for (i=0; i < PROFILE_CNT; i++) {
memset(&acct_gather_profile_timer[i], 0,
sizeof(acct_gather_profile_timer_t));
pthread_cond_init(&acct_gather_profile_timer[i].notify, NULL);
slurm_mutex_init(&acct_gather_profile_timer[i].notify_mutex);
switch (i) {
case PROFILE_ENERGY:
if (!(profile & ACCT_GATHER_PROFILE_ENERGY))
break;
_set_freq(i, freq, freq_def);
acct_gather_energy_startpoll(
acct_gather_profile_timer[i].freq);
break;
case PROFILE_TASK:
/* Always set up the task (always first) to be
done since it is used to control memory
consumption and such. It will check
profile inside it's plugin.
*/
_set_freq(i, freq, freq_def);
jobacct_gather_startpoll(
acct_gather_profile_timer[i].freq);
break;
case PROFILE_FILESYSTEM:
if (!(profile & ACCT_GATHER_PROFILE_LUSTRE))
break;
_set_freq(i, freq, freq_def);
acct_gather_filesystem_startpoll(
acct_gather_profile_timer[i].freq);
break;
case PROFILE_NETWORK:
if (!(profile & ACCT_GATHER_PROFILE_NETWORK))
break;
_set_freq(i, freq, freq_def);
acct_gather_infiniband_startpoll(
acct_gather_profile_timer[i].freq);
break;
default:
fatal("Unhandled profile option %d please update "
"slurm_acct_gather_profile.c "
"(acct_gather_profile_startpoll)", i);
}
}
/* create polling thread */
slurm_attr_init(&attr);
if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
error("pthread_attr_setdetachstate error %m");
if (pthread_create(&timer_thread_id, &attr,
&_timer_thread, NULL)) {
debug("acct_gather_profile_startpoll failed to create "
"_timer_thread: %m");
} else
debug3("acct_gather_profile_startpoll dynamic logging enabled");
slurm_attr_destroy(&attr);
return retval;
}
示例7: main
/* main - slurmctld main function, start various threads and process RPCs */
int main(int argc, char *argv[])
{
pthread_attr_t thread_attr;
char node_name[128];
void *db_conn = NULL;
assoc_init_args_t assoc_init_arg;
_init_config();
log_init(argv[0], log_opts, LOG_DAEMON, NULL);
if (read_slurmdbd_conf())
exit(1);
_parse_commandline(argc, argv);
_update_logging(true);
_update_nice();
if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
fatal("Unable to initialize %s authentication plugin",
slurmdbd_conf->auth_type);
}
if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS) {
fatal("Unable to initialize %s accounting storage plugin",
slurmdbd_conf->storage_type);
}
_kill_old_slurmdbd();
if (foreground == 0)
_daemonize();
/*
* Need to create pidfile here in case we setuid() below
* (init_pidfile() exits if it can't initialize pid file).
* On Linux we also need to make this setuid job explicitly
* able to write a core dump.
* This also has to happen after daemon(), which closes all fd's,
* so we keep the write lock of the pidfile.
*/
_init_pidfile();
_become_slurm_user();
if (foreground == 0)
_set_work_dir();
log_config();
#ifdef PR_SET_DUMPABLE
if (prctl(PR_SET_DUMPABLE, 1) < 0)
debug ("Unable to set dumpable to 1");
#endif /* PR_SET_DUMPABLE */
if (xsignal_block(dbd_sigarray) < 0)
error("Unable to block signals");
/* Create attached thread for signal handling */
slurm_attr_init(&thread_attr);
if (pthread_create(&signal_handler_thread, &thread_attr,
_signal_handler, NULL))
fatal("pthread_create %m");
slurm_attr_destroy(&thread_attr);
registered_clusters = list_create(NULL);
slurm_attr_init(&thread_attr);
if (pthread_create(&commit_handler_thread, &thread_attr,
_commit_handler, NULL))
fatal("pthread_create %m");
slurm_attr_destroy(&thread_attr);
memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t));
/* If we are tacking wckey we need to cache
wckeys, if we aren't only cache the users, qos */
assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS;
if (slurmdbd_conf->track_wckey)
assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY;
db_conn = acct_storage_g_get_connection(NULL, 0, true, NULL);
if (assoc_mgr_init(db_conn, &assoc_init_arg, errno) == SLURM_ERROR) {
error("Problem getting cache of data");
acct_storage_g_close_connection(&db_conn);
goto end_it;
}
if (gethostname_short(node_name, sizeof(node_name)))
fatal("getnodename: %m");
while (1) {
if (slurmdbd_conf->dbd_backup &&
(!strcmp(node_name, slurmdbd_conf->dbd_backup) ||
!strcmp(slurmdbd_conf->dbd_backup, "localhost"))) {
info("slurmdbd running in background mode");
have_control = false;
backup = true;
/* make sure any locks are released */
acct_storage_g_commit(db_conn, 1);
run_dbd_backup();
if (!shutdown_time)
assoc_mgr_refresh_lists(db_conn);
} else if (slurmdbd_conf->dbd_host &&
(!strcmp(slurmdbd_conf->dbd_host, node_name) ||
!strcmp(slurmdbd_conf->dbd_host, "localhost"))) {
backup = false;
have_control = true;
//.........这里部分代码省略.........
示例8: _fed_job_will_run
static int _fed_job_will_run(job_desc_msg_t *req,
will_run_response_msg_t **will_run_resp,
slurmdb_federation_rec_t *fed)
{
List resp_msg_list;
int pthread_count = 0, i;
pthread_t *load_thread = 0;
load_willrun_req_struct_t *load_args;
pthread_attr_t load_attr;
ListIterator iter;
will_run_response_msg_t *earliest_resp = NULL;
load_willrun_resp_struct_t *tmp_resp;
slurmdb_cluster_rec_t *cluster;
xassert(req);
xassert(will_run_resp);
slurm_attr_init(&load_attr);
*will_run_resp = NULL;
/* Spawn one pthread per cluster to collect job information */
resp_msg_list = list_create(NULL);
load_thread = xmalloc(sizeof(pthread_attr_t) *
list_count(fed->cluster_list));
iter = list_iterator_create(fed->cluster_list);
while ((cluster = (slurmdb_cluster_rec_t *)list_next(iter))) {
int retries = 0;
if ((cluster->control_host == NULL) ||
(cluster->control_host[0] == '\0'))
continue; /* Cluster down */
load_args = xmalloc(sizeof(load_willrun_req_struct_t));
load_args->cluster = cluster;
load_args->req = req;
load_args->resp_msg_list = resp_msg_list;
while (pthread_create(&load_thread[pthread_count], &load_attr,
_load_willrun_thread, (void *)load_args)) {
error("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
usleep(10000); /* sleep and retry */
}
pthread_count++;
}
list_iterator_destroy(iter);
slurm_attr_destroy(&load_attr);
/* Wait for all pthreads to complete */
for (i = 0; i < pthread_count; i++)
pthread_join(load_thread[i], NULL);
xfree(load_thread);
iter = list_iterator_create(resp_msg_list);
while ((tmp_resp = (load_willrun_resp_struct_t *)list_next(iter))) {
if (!tmp_resp->willrun_resp_msg)
slurm_seterrno(tmp_resp->rc);
else if ((!earliest_resp) ||
(tmp_resp->willrun_resp_msg->start_time <
earliest_resp->start_time)) {
slurm_free_will_run_response_msg(earliest_resp);
earliest_resp = tmp_resp->willrun_resp_msg;
tmp_resp->willrun_resp_msg = NULL;
}
slurm_free_will_run_response_msg(tmp_resp->willrun_resp_msg);
xfree(tmp_resp);
}
list_iterator_destroy(iter);
FREE_NULL_LIST(resp_msg_list);
*will_run_resp = earliest_resp;
if (!earliest_resp)
return SLURM_FAILURE;
return SLURM_SUCCESS;
}
示例9: free_block_list
/* block_state_mutex should be unlocked before calling this */
extern int free_block_list(uint32_t job_id, List track_list,
bool destroy, bool wait)
{
bg_record_t *bg_record = NULL;
int retries;
ListIterator itr = NULL;
bg_free_block_list_t *bg_free_list;
pthread_attr_t attr_agent;
pthread_t thread_agent;
if (!track_list || !list_count(track_list))
return SLURM_SUCCESS;
bg_free_list = xmalloc(sizeof(bg_free_block_list_t));
bg_free_list->track_list = list_create(NULL);
bg_free_list->destroy = destroy;
bg_free_list->job_id = job_id;
slurm_mutex_lock(&block_state_mutex);
list_transfer(bg_free_list->track_list, track_list);
itr = list_iterator_create(bg_free_list->track_list);
while ((bg_record = list_next(itr))) {
if (bg_record->magic != BLOCK_MAGIC) {
error("block was already destroyed %p", bg_record);
continue;
}
bg_record->free_cnt++;
if (bg_record->job_ptr
&& !IS_JOB_FINISHED(bg_record->job_ptr)) {
info("We are freeing a block (%s) that has job %u(%u).",
bg_record->bg_block_id,
bg_record->job_ptr->job_id,
bg_record->job_running);
/* This is not thread safe if called from
bg_job_place.c anywhere from within
submit_job() or at startup. */
slurm_mutex_unlock(&block_state_mutex);
bg_requeue_job(bg_record->job_ptr->job_id, 0);
slurm_mutex_lock(&block_state_mutex);
}
if (remove_from_bg_list(bg_lists->job_running, bg_record)
== SLURM_SUCCESS)
num_unused_cpus += bg_record->cpu_cnt;
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
if (wait) {
/* Track_freeing_blocks waits until the list is done
and frees the memory of bg_free_list.
*/
_track_freeing_blocks(bg_free_list);
return SLURM_SUCCESS;
}
/* _track_freeing_blocks handles cleanup */
slurm_attr_init(&attr_agent);
if (pthread_attr_setdetachstate(&attr_agent, PTHREAD_CREATE_DETACHED))
error("pthread_attr_setdetachstate error %m");
retries = 0;
while (pthread_create(&thread_agent, &attr_agent,
_track_freeing_blocks,
bg_free_list)) {
error("pthread_create error %m");
if (++retries > MAX_PTHREAD_RETRIES)
fatal("Can't create "
"pthread");
/* sleep and retry */
usleep(1000);
}
slurm_attr_destroy(&attr_agent);
return SLURM_SUCCESS;
}
示例10: basil_request
/*
* basil_request - issue BASIL request and parse response
* @bp: method-dependent parse data to guide the parsing process
*
* Returns 0 if ok, a negative %basil_error otherwise.
*/
int basil_request(struct basil_parse_data *bp)
{
int to_child, from_child;
int ec, i, rc = -BE_UNKNOWN;
FILE *apbasil;
pid_t pid = -1;
pthread_t thread;
pthread_attr_t attr;
int time_it_out = 1;
DEF_TIMERS;
if (log_sel == -1)
_init_log_config();
if (!cray_conf->apbasil) {
error("No alps client defined");
return 0;
}
if ((cray_conf->apbasil_timeout == 0) ||
(cray_conf->apbasil_timeout == (uint16_t) NO_VAL)) {
debug2("No ApbasilTimeout configured (%u)",
cray_conf->apbasil_timeout);
time_it_out = 0;
} else {
slurm_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
}
assert(bp->version < BV_MAX);
assert(bp->method > BM_none && bp->method < BM_MAX);
START_TIMER;
for (i = 0; ((i < 10) && (pid < 0)); i++) {
if (i)
usleep(100000);
pid = popen2(cray_conf->apbasil, &to_child, &from_child, true);
}
if (pid < 0)
fatal("popen2(\"%s\", ...)", cray_conf->apbasil);
if (time_it_out) {
pthread_create(&thread, &attr, _timer_func, (void*)&pid);
}
/* write out request */
apbasil = fdopen(to_child, "w");
if (apbasil == NULL)
fatal("fdopen(): %s", strerror(errno));
setlinebuf(apbasil);
_write_xml(apbasil, "<?xml version=\"1.0\"?>\n"
"<BasilRequest protocol=\"%s\" method=\"%s\" ",
bv_names[bp->version], bm_names[bp->method]);
switch (bp->method) {
case BM_engine:
_write_xml(apbasil, "type=\"ENGINE\"/>");
break;
case BM_inventory:
_write_xml(apbasil, "type=\"INVENTORY\"/>");
break;
case BM_reserve:
_write_xml(apbasil, ">\n");
_rsvn_write_reserve_xml(apbasil, bp->mdata.res, bp->version);
break;
case BM_confirm:
if (bp->version == BV_1_0 && *bp->mdata.res->batch_id != '\0')
_write_xml(apbasil, "job_name=\"%s\" ",
bp->mdata.res->batch_id);
_write_xml(apbasil, "reservation_id=\"%u\" %s=\"%llu\"/>\n",
bp->mdata.res->rsvn_id,
bp->version >= BV_3_1 ? "pagg_id" : "admin_cookie",
(unsigned long long)bp->mdata.res->pagg_id);
break;
case BM_release:
_write_xml(apbasil, "reservation_id=\"%u\"/>\n",
bp->mdata.res->rsvn_id);
break;
case BM_switch:
{
char *suspend = bp->mdata.res->suspended ? "OUT" : "IN";
_write_xml(apbasil, ">\n");
_write_xml(apbasil, " <ReservationArray>\n");
_write_xml(apbasil, " <Reservation reservation_id=\"%u\" "
"action=\"%s\"/>\n",
bp->mdata.res->rsvn_id, suspend);
_write_xml(apbasil, " </ReservationArray>\n");
_write_xml(apbasil, "</BasilRequest>\n");
}
break;
default: /* ignore BM_none, BM_MAX, and BM_UNKNOWN covered above */
break;
}
//.........这里部分代码省略.........
示例11: getenv
static void *_agent(void *x)
{
struct agent_arg *args = (struct agent_arg *) x;
kvs_comm_set_t *kvs_set;
struct msg_arg *msg_args;
struct kvs_hosts *kvs_host_list;
int i, j, kvs_set_cnt = 0, host_cnt, pmi_fanout = 32;
int msg_sent = 0, max_forward = 0;
char *tmp, *fanout_off_host;
pthread_t msg_id;
pthread_attr_t attr;
DEF_TIMERS;
tmp = getenv("PMI_FANOUT");
if (tmp) {
pmi_fanout = atoi(tmp);
if (pmi_fanout < 1)
pmi_fanout = 32;
}
fanout_off_host = getenv("PMI_FANOUT_OFF_HOST");
/* only send one message to each host,
* build table of the ports on each host */
START_TIMER;
slurm_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
kvs_set = xmalloc(sizeof(kvs_comm_set_t) * args->barrier_xmit_cnt);
for (i=0; i<args->barrier_xmit_cnt; i++) {
if (args->barrier_xmit_ptr[i].port == 0)
continue; /* already sent message to host */
kvs_host_list = xmalloc(sizeof(struct kvs_hosts) * pmi_fanout);
host_cnt = 0;
/* This code enables key-pair forwarding between
* tasks. First task on the node gets the key-pairs
* with host/port information for all other tasks on
* that node it should forward the information to. */
for (j=(i+1); j<args->barrier_xmit_cnt; j++) {
if (args->barrier_xmit_ptr[j].port == 0)
continue; /* already sent message */
if ((fanout_off_host == NULL) &&
strcmp(args->barrier_xmit_ptr[i].hostname,
args->barrier_xmit_ptr[j].hostname))
continue; /* another host */
kvs_host_list[host_cnt].task_id = 0; /* not avail */
kvs_host_list[host_cnt].port =
args->barrier_xmit_ptr[j].port;
kvs_host_list[host_cnt].hostname =
args->barrier_xmit_ptr[j].hostname;
args->barrier_xmit_ptr[j].port = 0;/* don't reissue */
host_cnt++;
if (host_cnt >= pmi_fanout)
break;
}
msg_sent++;
max_forward = MAX(host_cnt, max_forward);
slurm_mutex_lock(&agent_mutex);
while (agent_cnt >= agent_max_cnt)
pthread_cond_wait(&agent_cond, &agent_mutex);
agent_cnt++;
slurm_mutex_unlock(&agent_mutex);
msg_args = xmalloc(sizeof(struct msg_arg));
msg_args->bar_ptr = &args->barrier_xmit_ptr[i];
msg_args->kvs_ptr = &kvs_set[kvs_set_cnt];
kvs_set[kvs_set_cnt].host_cnt = host_cnt;
kvs_set[kvs_set_cnt].kvs_host_ptr = kvs_host_list;
kvs_set[kvs_set_cnt].kvs_comm_recs = args->kvs_xmit_cnt;
kvs_set[kvs_set_cnt].kvs_comm_ptr = args->kvs_xmit_ptr;
kvs_set_cnt++;
if (agent_max_cnt == 1) {
/* TotalView slows down a great deal for
* pthread_create() calls, so just send the
* messages inline when TotalView is in use
* or for some other reason we only want
* one pthread. */
_msg_thread((void *) msg_args);
} else if (pthread_create(&msg_id, &attr, _msg_thread,
(void *) msg_args)) {
fatal("pthread_create: %m");
}
}
verbose("Sent KVS info to %d nodes, up to %d tasks per node",
msg_sent, (max_forward+1));
/* wait for completion of all outgoing message */
slurm_mutex_lock(&agent_mutex);
while (agent_cnt > 0)
pthread_cond_wait(&agent_cond, &agent_mutex);
slurm_mutex_unlock(&agent_mutex);
slurm_attr_destroy(&attr);
/* Release allocated memory */
for (i=0; i<kvs_set_cnt; i++)
xfree(kvs_set[i].kvs_host_ptr);
xfree(kvs_set);
for (i=0; i<args->barrier_xmit_cnt; i++)
//.........这里部分代码省略.........
示例12: _load_fed_parts
static int _load_fed_parts(slurm_msg_t *req_msg,
partition_info_msg_t **part_info_msg_pptr,
uint16_t show_flags, char *cluster_name,
slurmdb_federation_rec_t *fed)
{
int cluster_inx = 0, i;
load_part_resp_struct_t *part_resp;
partition_info_msg_t *orig_msg = NULL, *new_msg = NULL;
uint32_t new_rec_cnt;
slurmdb_cluster_rec_t *cluster;
ListIterator iter;
pthread_attr_t load_attr;
int pthread_count = 0;
pthread_t *load_thread = 0;
load_part_req_struct_t *load_args;
List resp_msg_list;
*part_info_msg_pptr = NULL;
/* Spawn one pthread per cluster to collect partition information */
resp_msg_list = list_create(NULL);
load_thread = xmalloc(sizeof(pthread_attr_t) *
list_count(fed->cluster_list));
iter = list_iterator_create(fed->cluster_list);
while ((cluster = (slurmdb_cluster_rec_t *) list_next(iter))) {
int retries = 0;
if ((cluster->control_host == NULL) ||
(cluster->control_host[0] == '\0'))
continue; /* Cluster down */
load_args = xmalloc(sizeof(load_part_req_struct_t));
load_args->cluster = cluster;
load_args->cluster_inx = cluster_inx++;
load_args->req_msg = req_msg;
load_args->resp_msg_list = resp_msg_list;
load_args->show_flags = show_flags;
slurm_attr_init(&load_attr);
if (pthread_attr_setdetachstate(&load_attr,
PTHREAD_CREATE_JOINABLE))
error("pthread_attr_setdetachstate error %m");
while (pthread_create(&load_thread[pthread_count], &load_attr,
_load_part_thread, (void *) load_args)) {
error("pthread_create error %m");
if (++retries > MAX_RETRIES)
fatal("Can't create pthread");
usleep(10000); /* sleep and retry */
}
pthread_count++;
slurm_attr_destroy(&load_attr);
}
list_iterator_destroy(iter);
/* Wait for all pthreads to complete */
for (i = 0; i < pthread_count; i++)
pthread_join(load_thread[i], NULL);
xfree(load_thread);
/* Maintain a consistent cluster/node ordering */
list_sort(resp_msg_list, _sort_by_cluster_inx);
/* Merge the responses into a single response message */
iter = list_iterator_create(resp_msg_list);
while ((part_resp = (load_part_resp_struct_t *) list_next(iter))) {
new_msg = part_resp->new_msg;
if (!orig_msg) {
orig_msg = new_msg;
*part_info_msg_pptr = orig_msg;
} else {
/* Merge the node records */
orig_msg->last_update = MIN(orig_msg->last_update,
new_msg->last_update);
new_rec_cnt = orig_msg->record_count +
new_msg->record_count;
if (new_msg->record_count) {
orig_msg->partition_array =
xrealloc(orig_msg->partition_array,
sizeof(partition_info_t) *
new_rec_cnt);
(void) memcpy(orig_msg->partition_array +
orig_msg->record_count,
new_msg->partition_array,
sizeof(partition_info_t) *
new_msg->record_count);
orig_msg->record_count = new_rec_cnt;
}
xfree(new_msg->partition_array);
xfree(new_msg);
}
xfree(part_resp);
}
list_iterator_destroy(iter);
FREE_NULL_LIST(resp_msg_list);
if (!orig_msg)
slurm_seterrno_ret(SLURM_ERROR);
return SLURM_PROTOCOL_SUCCESS;
}
示例13: _xlate_before
static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
{
uint32_t job_id;
char *last_ptr = NULL, *new_dep = NULL, *tok, *type;
struct job_record *job_ptr;
pthread_attr_t attr;
pthread_t dep_thread;
tok = strtok_r(depend, ":", &last_ptr);
if (!xstrcmp(tok, "before"))
type = "after";
else if (!xstrcmp(tok, "beforeany"))
type = "afterany";
else if (!xstrcmp(tok, "beforenotok"))
type = "afternotok";
else if (!xstrcmp(tok, "beforeok"))
type = "afterok";
else {
info("%s: discarding invalid job dependency option %s",
plugin_type, tok);
return;
}
/* NOTE: We are updating a job record here in order to implement
* the depend=before option. We are doing so without the write lock
* on the job record, but using a local mutex to prevent multiple
* updates on the same job when multiple jobs satisfying the dependency
* are being processed at the same time (all with read locks). The
* job read lock will prevent anyone else from getting a job write
* lock and using a job write lock causes serious performance problems
* for slow job_submit plugins. Not an ideal solution, but the best
* option that we see. */
slurm_mutex_lock(&depend_mutex);
tok = strtok_r(NULL, ":", &last_ptr);
while (tok) {
job_id = atoi(tok);
job_ptr = find_job_record(job_id);
if (!job_ptr) {
info("%s: discarding invalid job dependency before %s",
plugin_type, tok);
} else if ((submit_uid != job_ptr->user_id) &&
!validate_super_user(submit_uid)) {
error("%s: Security violation: uid %u trying to alter "
"job %u belonging to uid %u",
plugin_type, submit_uid, job_ptr->job_id,
job_ptr->user_id);
} else if ((!IS_JOB_PENDING(job_ptr)) ||
(job_ptr->details == NULL)) {
info("%s: discarding job before dependency on "
"non-pending job %u",
plugin_type, job_ptr->job_id);
} else {
if (job_ptr->details->dependency) {
xstrcat(new_dep, job_ptr->details->dependency);
xstrcat(new_dep, ",");
}
xstrfmtcat(new_dep, "%s:%u", type, my_job_id);
xfree(job_ptr->details->dependency);
job_ptr->details->dependency = new_dep;
new_dep = NULL;
_decr_depend_cnt(job_ptr);
slurm_attr_init(&attr);
pthread_attr_setdetachstate(&attr,
PTHREAD_CREATE_DETACHED);
pthread_create(&dep_thread, &attr, _dep_agent, job_ptr);
slurm_attr_destroy(&attr);
}
tok = strtok_r(NULL, ":", &last_ptr);
}
slurm_mutex_unlock(&depend_mutex);
}
示例14: main
int main(int argc, char *argv[])
{
log_options_t log_opts = LOG_OPTS_INITIALIZER;
hostlist_t hl = NULL;
char *node_name;
pthread_attr_t attr_work;
pthread_t thread_work = 0;
xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid());
_read_config();
log_opts.stderr_level = LOG_LEVEL_QUIET;
log_opts.syslog_level = LOG_LEVEL_QUIET;
if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES)
log_opts.logfile_level += 3;
(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);
/* Attempt to shutdown all nodes in a single capmc call,
* attempt to shutdown individual nodes only if that fails. */
if (_update_all_nodes(argv[1]) != 0) {
if ((hl = hostlist_create(argv[1])) == NULL) {
error("%s: Invalid hostlist (%s)", prog_name, argv[1]);
exit(2);
}
while ((node_name = hostlist_pop(hl))) {
slurm_mutex_lock(&thread_cnt_mutex);
while (1) {
if (thread_cnt <= MAX_THREADS) {
thread_cnt++;
break;
} else { /* wait for state change and retry */
pthread_cond_wait(&thread_cnt_cond,
&thread_cnt_mutex);
}
}
slurm_mutex_unlock(&thread_cnt_mutex);
slurm_attr_init(&attr_work);
(void) pthread_attr_setdetachstate
(&attr_work, PTHREAD_CREATE_DETACHED);
if (pthread_create(&thread_work, &attr_work,
_node_update, (void *) node_name)) {
_node_update((void *) node_name);
}
slurm_attr_destroy(&attr_work);
}
hostlist_destroy(hl);
}
/* Wait for work threads to complete */
slurm_mutex_lock(&thread_cnt_mutex);
while (1) {
if (thread_cnt == 0)
break;
else /* wait for state change and retry */
pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex);
}
slurm_mutex_unlock(&thread_cnt_mutex);
xfree(prog_name);
exit(0);
}
示例15: _build_sinfo_data
/*
* _build_sinfo_data - make a sinfo_data entry for each unique node
* configuration and add it to the sinfo_list for later printing.
* sinfo_list IN/OUT - list of unique sinfo_data records to report
* partition_msg IN - partition info message
* node_msg IN - node info message
* RET zero or error code
*/
static int _build_sinfo_data(List sinfo_list,
partition_info_msg_t *partition_msg,
node_info_msg_t *node_msg)
{
pthread_attr_t attr_sinfo;
pthread_t thread_sinfo;
build_part_info_t *build_struct_ptr;
node_info_t *node_ptr = NULL;
partition_info_t *part_ptr = NULL;
int j;
g_node_scaling = node_msg->node_scaling;
/* by default every partition is shown, even if no nodes */
if ((!params.node_flag) && params.match_flags.partition_flag) {
part_ptr = partition_msg->partition_array;
for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
if ((!params.partition) ||
(_strcmp(params.partition, part_ptr->name) == 0)) {
list_append(sinfo_list, _create_sinfo(
part_ptr, (uint16_t) j,
NULL,
node_msg->node_scaling));
}
}
}
if (params.filtering) {
for (j = 0; j < node_msg->record_count; j++) {
node_ptr = &(node_msg->node_array[j]);
if (node_ptr->name && _filter_out(node_ptr))
xfree(node_ptr->name);
}
}
/* make sinfo_list entries for every node in every partition */
for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
part_ptr = &(partition_msg->partition_array[j]);
if (params.filtering && params.partition &&
_strcmp(part_ptr->name, params.partition))
continue;
if (node_msg->record_count == 1) { /* node_name_single */
int pos = -1;
uint16_t subgrp_size = 0;
hostlist_t hl;
node_ptr = &(node_msg->node_array[0]);
if ((node_ptr->name == NULL) ||
(part_ptr->nodes == NULL))
continue;
hl = hostlist_create(part_ptr->nodes);
pos = hostlist_find(hl, node_msg->node_array[0].name);
hostlist_destroy(hl);
if (pos < 0)
continue;
if (select_g_select_nodeinfo_get(
node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBGRP_SIZE,
0,
&subgrp_size) == SLURM_SUCCESS
&& subgrp_size) {
_handle_subgrps(sinfo_list,
(uint16_t) j,
part_ptr,
node_ptr,
node_msg->
node_scaling);
} else {
_insert_node_ptr(sinfo_list,
(uint16_t) j,
part_ptr,
node_ptr,
node_msg->
node_scaling);
}
continue;
}
/* Process each partition using a separate thread */
build_struct_ptr = xmalloc(sizeof(build_part_info_t));
build_struct_ptr->node_msg = node_msg;
build_struct_ptr->part_num = (uint16_t) j;
build_struct_ptr->part_ptr = part_ptr;
build_struct_ptr->sinfo_list = sinfo_list;
slurm_mutex_lock(&sinfo_cnt_mutex);
sinfo_cnt++;
slurm_mutex_unlock(&sinfo_cnt_mutex);
slurm_attr_init(&attr_sinfo);
//.........这里部分代码省略.........