本文整理汇总了C++中slurm_mutex_unlock函数的典型用法代码示例。如果您正苦于以下问题:C++ slurm_mutex_unlock函数的具体用法?C++ slurm_mutex_unlock怎么用?C++ slurm_mutex_unlock使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了slurm_mutex_unlock函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: _atfork_child
static void _atfork_child() { slurm_mutex_unlock(&log_lock); }
示例2: _start_agent
/* Perform job initiation work */
static void _start_agent(bg_action_t *bg_action_ptr)
{
int rc, set_user_rc = SLURM_SUCCESS;
bg_record_t *bg_record = NULL;
bg_record_t *found_record = NULL;
ListIterator itr;
List delete_list = NULL;
int requeue_job = 0;
uint32_t req_job_id = bg_action_ptr->job_ptr->job_id;
bool block_inited = 0;
bool delete_it = 0;
slurm_mutex_lock(&block_state_mutex);
bg_record = find_bg_record_in_list(bg_lists->main,
bg_action_ptr->bg_block_id);
if (!bg_record) {
bg_record->modifying = 0;
slurm_mutex_unlock(&block_state_mutex);
error("block %s not found in bg_lists->main",
bg_action_ptr->bg_block_id);
bg_requeue_job(req_job_id, 1, 0, JOB_BOOT_FAIL, false);
return;
}
if ((bg_record->job_running <= NO_JOB_RUNNING)
&& !find_job_in_bg_record(bg_record, req_job_id)) {
bg_record->modifying = 0;
// bg_reset_block(bg_record); should already happened
slurm_mutex_unlock(&block_state_mutex);
debug("job %u finished during the queueing job "
"(everything is ok)",
req_job_id);
return;
}
if ((bg_record->state == BG_BLOCK_TERM) || bg_record->free_cnt) {
/* It doesn't appear state of a small block
(conn_type) is held on a BGP system so
if we to reset it so, just set the reboot flag and
handle it later in that code. */
bg_action_ptr->reboot = 1;
}
delete_list = list_create(NULL);
itr = list_iterator_create(bg_lists->main);
while ((found_record = list_next(itr))) {
if (bg_record == found_record)
continue;
if (!blocks_overlap(bg_record, found_record)) {
debug2("block %s isn't part of %s",
found_record->bg_block_id,
bg_record->bg_block_id);
continue;
}
if (found_record->job_ptr
|| (found_record->job_list
&& list_count(found_record->job_list))) {
struct job_record *job_ptr = found_record->job_ptr;
if (!found_record->job_ptr)
job_ptr = find_job_in_bg_record(
found_record, NO_VAL);
error("Trying to start job %u on block %s, "
"but there is a job %u running on an overlapping "
"block %s it will not end until %ld. "
"This should never happen.",
req_job_id,
bg_record->bg_block_id,
job_ptr->job_id,
found_record->bg_block_id,
job_ptr->end_time);
requeue_job = 1;
break;
}
debug2("need to make sure %s is free, it's part of %s",
found_record->bg_block_id,
bg_record->bg_block_id);
list_push(delete_list, found_record);
}
list_iterator_destroy(itr);
if (requeue_job) {
FREE_NULL_LIST(delete_list);
bg_reset_block(bg_record, bg_action_ptr->job_ptr);
bg_record->modifying = 0;
slurm_mutex_unlock(&block_state_mutex);
bg_requeue_job(req_job_id, 0, 0, JOB_BOOT_FAIL, false);
return;
}
slurm_mutex_unlock(&block_state_mutex);
if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
delete_it = 1;
//.........这里部分代码省略.........
示例3: sync_jobs
//.........这里部分代码省略.........
if (IS_JOB_COMPLETING(job_ptr))
bg_action_ptr->op = TERM_OP;
else
bg_action_ptr->op = START_OP;
bg_action_ptr->job_ptr = job_ptr;
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_BLOCK_ID,
&(bg_action_ptr->bg_block_id));
#ifdef HAVE_BG_L_P
# ifdef HAVE_BGL
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_BLRTS_IMAGE,
&(bg_action_ptr->blrtsimage));
# else
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_CONN_TYPE,
&(bg_action_ptr->conn_type));
# endif
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_LINUX_IMAGE,
&(bg_action_ptr->linuximage));
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_RAMDISK_IMAGE,
&(bg_action_ptr->ramdiskimage));
#endif
get_select_jobinfo(job_ptr->select_jobinfo->data,
SELECT_JOBDATA_MLOADER_IMAGE,
&(bg_action_ptr->mloaderimage));
if (bg_action_ptr->bg_block_id == NULL) {
error("Running job %u has bgblock==NULL",
job_ptr->job_id);
} else if (job_ptr->nodes == NULL) {
error("Running job %u has nodes==NULL",
job_ptr->job_id);
} else if (!(bg_record = find_bg_record_in_list(
bg_lists->main,
bg_action_ptr->bg_block_id))) {
error("Kill job %u belongs to defunct "
"bgblock %s",
job_ptr->job_id,
bg_action_ptr->bg_block_id);
}
if (!bg_record) {
/* Can't fail it just now, we have locks in
place. */
bg_status_add_job_kill_list(job_ptr, &kill_list);
_destroy_bg_action(bg_action_ptr);
continue;
}
/* _sync_agent will destroy the bg_action_ptr */
_sync_agent(bg_action_ptr, bg_record);
}
list_iterator_destroy(itr);
block_list = list_create(destroy_bg_record);
itr = list_iterator_create(bg_lists->main);
while ((bg_record = list_next(itr))) {
bg_record_t *rm_record;
if (bg_record->job_ptr
|| (bg_record->job_list
&& list_count(bg_record->job_list)))
continue;
rm_record = xmalloc(sizeof(bg_record_t));
rm_record->magic = BLOCK_MAGIC;
rm_record->bg_block_id = xstrdup(bg_record->bg_block_id);
rm_record->mp_str = xstrdup(bg_record->mp_str);
list_append(block_list, rm_record);
}
list_iterator_destroy(itr);
slurm_mutex_unlock(&block_state_mutex);
if (kill_list) {
/* slurmctld is already locked up, so handle this right after
* the unlock of block_state_mutex.
*/
bg_status_process_kill_job_list(kill_list, JOB_BOOT_FAIL, 1);
FREE_NULL_LIST(kill_list);
}
/* Insure that all other blocks are free of users */
if (block_list) {
itr = list_iterator_create(block_list);
while ((bg_record = list_next(itr))) {
info("Queue clearing of users of BG block %s",
bg_record->bg_block_id);
term_jobs_on_block(bg_record->bg_block_id);
}
list_iterator_destroy(itr);
FREE_NULL_LIST(block_list);
} else {
/* this should never happen,
* vestigial logic */
error("sync_jobs: no block_list");
return SLURM_ERROR;
}
return SLURM_SUCCESS;
}
示例4: bit_alloc
/*
* init_power_save - Initialize the power save module. Started as a
* pthread. Terminates automatically at slurmctld shutdown time.
* Input and output are unused.
*/
static void *_init_power_save(void *arg)
{
/* Locks: Read nodes */
slurmctld_lock_t node_read_lock = {
NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
/* Locks: Write nodes */
slurmctld_lock_t node_write_lock = {
NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
time_t now, boot_time = 0, last_power_scan = 0;
if (_init_power_config())
goto fini;
suspend_node_bitmap = bit_alloc(node_record_count);
resume_node_bitmap = bit_alloc(node_record_count);
while (slurmctld_config.shutdown_time == 0) {
sleep(1);
if (_reap_procs() < 2) {
debug("power_save programs getting backlogged");
continue;
}
if ((last_config != slurmctld_conf.last_update) &&
(_init_power_config())) {
info("power_save mode has been disabled due to "
"configuration changes");
goto fini;
}
now = time(NULL);
if (boot_time == 0)
boot_time = now;
/* Only run every 60 seconds or after a node state change,
* whichever happens first */
if ((last_node_update >= last_power_scan) ||
(now >= (last_power_scan + 60))) {
lock_slurmctld(node_write_lock);
_do_power_work(now);
unlock_slurmctld(node_write_lock);
last_power_scan = now;
}
if (slurmd_timeout &&
(now > (boot_time + (slurmd_timeout / 2)))) {
lock_slurmctld(node_read_lock);
_re_wake();
unlock_slurmctld(node_read_lock);
/* prevent additional executions */
boot_time += (365 * 24 * 60 * 60);
slurmd_timeout = 0;
}
}
fini: _clear_power_config();
FREE_NULL_BITMAP(suspend_node_bitmap);
FREE_NULL_BITMAP(resume_node_bitmap);
_shutdown_power();
slurm_mutex_lock(&power_mutex);
power_save_enabled = false;
slurm_mutex_unlock(&power_mutex);
pthread_exit(NULL);
return NULL;
}
示例5: as_mysql_user_no_assocs_or_no_uid
//.........这里部分代码省略.........
char *query = NULL;
MYSQL_RES *result = NULL;
MYSQL_ROW row;
List use_cluster_list = as_mysql_cluster_list;
ListIterator itr = NULL;
char *cluster_name = NULL;
xassert(ret_list);
query = xstrdup_printf("select name from %s where deleted=0",
user_table);
if (assoc_cond &&
assoc_cond->user_list && list_count(assoc_cond->user_list)) {
int set = 0;
char *object = NULL;
xstrcat(query, " && (");
itr = list_iterator_create(assoc_cond->user_list);
while ((object = list_next(itr))) {
if (set)
xstrcat(query, " || ");
xstrfmtcat(query, "name='%s'", object);
set = 1;
}
list_iterator_destroy(itr);
xstrcat(query, ")");
}
if (!(result = mysql_db_query_ret(
mysql_conn, query, 0))) {
xfree(query);
return SLURM_ERROR;
}
xfree(query);
if (assoc_cond &&
assoc_cond->cluster_list && list_count(assoc_cond->cluster_list))
use_cluster_list = assoc_cond->cluster_list;
else
slurm_mutex_lock(&as_mysql_cluster_list_lock);
itr = list_iterator_create(use_cluster_list);
while ((row = mysql_fetch_row(result))) {
MYSQL_RES *result2 = NULL;
int cnt = 0;
slurmdb_assoc_rec_t *assoc = NULL;
uid_t pw_uid;
if (uid_from_string (row[0], &pw_uid) < 0) {
assoc = xmalloc(sizeof(slurmdb_assoc_rec_t));
list_append(ret_list, assoc);
assoc->id = SLURMDB_PROBLEM_USER_NO_UID;
assoc->user = xstrdup(row[0]);
continue;
}
/* See if we have at least 1 association in the system */
while ((cluster_name = list_next(itr))) {
if (query)
xstrcat(query, " union ");
xstrfmtcat(query,
"select distinct id_assoc from \"%s_%s\" "
"where deleted=0 && "
"user='%s'",
cluster_name, assoc_table, row[0]);
}
list_iterator_reset(itr);
if (query)
xstrcat(query, " limit 1");
if (!(result2 = mysql_db_query_ret(
mysql_conn, query, 0))) {
xfree(query);
rc = SLURM_ERROR;
break;
}
xfree(query);
cnt = mysql_num_rows(result2);
mysql_free_result(result2);
if (cnt)
continue;
assoc = xmalloc(sizeof(slurmdb_assoc_rec_t));
list_append(ret_list, assoc);
assoc->id = SLURMDB_PROBLEM_USER_NO_ASSOC;
assoc->user = xstrdup(row[0]);
}
mysql_free_result(result);
list_iterator_destroy(itr);
if (use_cluster_list == as_mysql_cluster_list)
slurm_mutex_unlock(&as_mysql_cluster_list_lock);
return rc;
}
示例6: _get_process_data
/*
* _get_process_data() - Build a table of all current processes
*
* IN: pid.
*
* OUT: none
*
* THREADSAFE! Only one thread ever gets here.
*
* Assumption:
* Any file with a name of the form "/proc/[0-9]+/stat"
* is a Linux-style stat entry. We disregard the data if they look
* wrong.
*/
static void _get_process_data(void)
{
struct procsinfo proc;
pid_t *pids = NULL;
int npids = 0;
int i;
uint32_t total_job_mem = 0, total_job_vsize = 0;
int pid = 0;
static int processing = 0;
prec_t *prec = NULL;
struct jobacctinfo *jobacct = NULL;
List prec_list = NULL;
ListIterator itr;
ListIterator itr2;
if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) {
debug("cont_id hasn't been set yet not running poll");
return;
}
if(processing) {
debug("already running, returning");
return;
}
processing = 1;
prec_list = list_create(_destroy_prec);
if(!pgid_plugin) {
/* get only the processes in the proctrack container */
slurm_container_get_pids(cont_id, &pids, &npids);
if (!npids) {
debug4("no pids in this container %"PRIu64"", cont_id);
goto finished;
}
for (i = 0; i < npids; i++) {
pid = pids[i];
if(!getprocs(&proc, sizeof(proc), 0, 0, &pid, 1))
continue; /* Assume the process went away */
prec = xmalloc(sizeof(prec_t));
list_append(prec_list, prec);
prec->pid = proc.pi_pid;
prec->ppid = proc.pi_ppid;
prec->usec = proc.pi_ru.ru_utime.tv_sec +
proc.pi_ru.ru_utime.tv_usec * 1e-6;
prec->ssec = proc.pi_ru.ru_stime.tv_sec +
proc.pi_ru.ru_stime.tv_usec * 1e-6;
prec->pages = proc.pi_majflt;
prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize;
//prec->rss *= 1024;
prec->vsize = (proc.pi_tsize / 1024);
prec->vsize += (proc.pi_dvm * pagesize);
//prec->vsize *= 1024;
/* debug("vsize = %f = (%d/1024)+(%d*%d)", */
/* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */
}
} else {
while(getprocs(&proc, sizeof(proc), 0, 0, &pid, 1) == 1) {
prec = xmalloc(sizeof(prec_t));
list_append(prec_list, prec);
prec->pid = proc.pi_pid;
prec->ppid = proc.pi_ppid;
prec->usec = proc.pi_ru.ru_utime.tv_sec +
proc.pi_ru.ru_utime.tv_usec * 1e-6;
prec->ssec = proc.pi_ru.ru_stime.tv_sec +
proc.pi_ru.ru_stime.tv_usec * 1e-6;
prec->pages = proc.pi_majflt;
prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize;
//prec->rss *= 1024;
prec->vsize = (proc.pi_tsize / 1024);
prec->vsize += (proc.pi_dvm * pagesize);
//prec->vsize *= 1024;
/* debug("vsize = %f = (%d/1024)+(%d*%d)", */
/* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */
}
}
if(!list_count(prec_list))
goto finished;
slurm_mutex_lock(&jobacct_lock);
if(!task_list || !list_count(task_list)) {
slurm_mutex_unlock(&jobacct_lock);
goto finished;
}
itr = list_iterator_create(task_list);
while((jobacct = list_next(itr))) {
//.........这里部分代码省略.........
示例7: _check_for_booted_overlapping_blocks
static int _check_for_booted_overlapping_blocks(
List block_list, ListIterator bg_record_itr,
bg_record_t *bg_record, int overlap_check, List overlapped_list,
uint16_t query_mode)
{
bg_record_t *found_record = NULL;
ListIterator itr = NULL;
int rc = 0;
int overlap = 0;
bool is_test = SELECT_IS_TEST(query_mode);
/* this test only is for actually picking a block not testing */
if (is_test && bg_conf->layout_mode == LAYOUT_DYNAMIC)
return rc;
/* Make sure no other blocks are under this block
are booted and running jobs
*/
itr = list_iterator_create(block_list);
while ((found_record = (bg_record_t*)list_next(itr)) != NULL) {
if ((!found_record->bg_block_id)
|| (bg_record == found_record)) {
if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
info("Don't need to look at myself %s %s",
bg_record->bg_block_id,
found_record->bg_block_id);
continue;
}
slurm_mutex_lock(&block_state_mutex);
overlap = blocks_overlap(bg_record, found_record);
slurm_mutex_unlock(&block_state_mutex);
if (overlap) {
overlap = 0;
/* make the available time on this block
* (bg_record) the max of this found_record's job
* or the one already set if in overlapped_block_list
* since we aren't setting job_running we
* don't have to remove them since the
* block_list should always be destroyed afterwards.
*/
if (is_test && overlapped_list
&& found_record->job_ptr
&& bg_record->job_running == NO_JOB_RUNNING) {
ListIterator itr = list_iterator_create(
overlapped_list);
bg_record_t *tmp_rec = NULL;
if (bg_conf->slurm_debug_flags
& DEBUG_FLAG_BG_PICK)
info("found overlapping block %s "
"overlapped %s with job %u",
found_record->bg_block_id,
bg_record->bg_block_id,
found_record->job_ptr->job_id);
while ((tmp_rec = list_next(itr))) {
if (tmp_rec == bg_record)
break;
}
list_iterator_destroy(itr);
if (tmp_rec && tmp_rec->job_ptr->end_time
< found_record->job_ptr->end_time)
tmp_rec->job_ptr =
found_record->job_ptr;
else if (!tmp_rec) {
bg_record->job_ptr =
found_record->job_ptr;
list_append(overlapped_list,
bg_record);
}
}
/* We already know this block doesn't work
* right now so we will if there is another
* overlapping block that ends later
*/
if (rc)
continue;
/* This test is here to check if the block we
* chose is not booted or if there is a block
* overlapping that we could avoid freeing if
* we choose something else
*/
if (bg_conf->layout_mode == LAYOUT_OVERLAP
&& ((overlap_check == 0 && bg_record->state
!= BG_BLOCK_INITED)
|| (overlap_check == 1 && found_record->state
!= BG_BLOCK_FREE))) {
if (!is_test) {
rc = 1;
break;
}
}
if (((bg_conf->layout_mode == LAYOUT_DYNAMIC)
|| ((!SELECT_IS_CHECK_FULL_SET(query_mode)
|| SELECT_IS_MODE_RUN_NOW(query_mode))
&& (bg_conf->layout_mode != LAYOUT_DYNAMIC)))
//.........这里部分代码省略.........
示例8: proctrack_p_get_pids
int proctrack_p_get_pids (uint64_t cont_id, pid_t **pids, int *npids)
{
int rc = SLURM_ERROR;
int i = 0;
int t = 0;
pid_t *p;
*npids = 0;
slurm_mutex_lock (&lua_lock);
lua_getglobal (L, "proctrack_g_get_pids");
if (lua_isnil (L, -1))
goto out;
lua_pushnumber (L, cont_id);
if (lua_pcall (L, 1, 1, 0) != 0) {
error ("%s: %s: %s",
"proctrack/lua",
__func__,
lua_tostring (L, -1));
goto out;
}
/*
* list of PIDs should be returned in a table from the lua
* script. If a table wasn't returned then generate an error:
*/
if (!lua_istable(L, -1)) {
error ("%s: %s: function should return a table",
"proctrack/lua",
__func__);
goto out;
}
/*
* Save absolute position of table in lua stack
*/
t = lua_gettop (L);
/*
* Get table size and create array for slurm
*/
*npids = lua_objlen (L, t);
p = (pid_t *) xmalloc (*npids * sizeof (pid_t));
/*
* Traverse table/array at position t on the stack:
*/
lua_pushnil (L);
while (lua_next (L, t)) {
p [i++] = lua_tonumber (L, -1);
/*
* pop value off stack, leave key for lua_next()
*/
lua_pop (L, 1);
}
lua_pop (L, 1);
*pids = p;
rc = SLURM_SUCCESS;
out:
slurm_mutex_unlock (&lua_lock);
return rc;
}
示例9: core_spec_g_init
/*
* Initialize the core specialization plugin.
*
* RET - slurm error code
*/
extern int core_spec_g_init(void)
{
int retval = SLURM_SUCCESS;
char *plugin_type = "core_spec";
char *core_spec_plugin_type = NULL;
char *last = NULL, *core_spec_plugin_list, *core_spec = NULL;
if (init_run && (g_core_spec_context_num >= 0))
return retval;
slurm_mutex_lock(&g_core_spec_context_lock);
if (g_core_spec_context_num >= 0)
goto done;
core_spec_plugin_type = slurm_get_core_spec_plugin();
g_core_spec_context_num = 0; /* mark it before anything else */
if ((core_spec_plugin_type == NULL) ||
(core_spec_plugin_type[0] == '\0'))
goto done;
core_spec_plugin_list = core_spec_plugin_type;
while ((core_spec =
strtok_r(core_spec_plugin_list, ",", &last))) {
xrealloc(ops,
sizeof(core_spec_ops_t) *
(g_core_spec_context_num + 1));
xrealloc(g_core_spec_context, (sizeof(plugin_context_t *)
* (g_core_spec_context_num + 1)));
if (xstrncmp(core_spec, "core_spec/", 10) == 0)
core_spec += 10; /* backward compatibility */
core_spec = xstrdup_printf("core_spec/%s",
core_spec);
g_core_spec_context[g_core_spec_context_num] =
plugin_context_create(
plugin_type, core_spec,
(void **)&ops[g_core_spec_context_num],
syms, sizeof(syms));
if (!g_core_spec_context[g_core_spec_context_num]) {
error("cannot create %s context for %s",
plugin_type, core_spec);
xfree(core_spec);
retval = SLURM_ERROR;
break;
}
xfree(core_spec);
g_core_spec_context_num++;
core_spec_plugin_list = NULL; /* for next iteration */
}
init_run = true;
done:
slurm_mutex_unlock(&g_core_spec_context_lock);
xfree(core_spec_plugin_type);
if (retval != SLURM_SUCCESS)
core_spec_g_fini();
return retval;
}
示例10: _cancel_step_id
static void *
_cancel_step_id (void *ci)
{
int error_code = SLURM_SUCCESS, i;
job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
uint32_t job_id = cancel_info->job_id;
uint32_t step_id = cancel_info->step_id;
bool sig_set = true;
DEF_TIMERS;
if (cancel_info->sig == (uint16_t) NO_VAL) {
cancel_info->sig = SIGKILL;
sig_set = false;
}
if (!cancel_info->job_id_str) {
if (cancel_info->array_job_id &&
(cancel_info->array_task_id == INFINITE)) {
xstrfmtcat(cancel_info->job_id_str, "%u_*",
cancel_info->array_job_id);
} else if (cancel_info->array_job_id) {
xstrfmtcat(cancel_info->job_id_str, "%u_%u",
cancel_info->array_job_id,
cancel_info->array_task_id);
} else {
xstrfmtcat(cancel_info->job_id_str, "%u",
cancel_info->job_id);
}
}
for (i = 0; i < MAX_CANCEL_RETRY; i++) {
if (cancel_info->sig == SIGKILL) {
verbose("Terminating step %s.%u",
cancel_info->job_id_str, step_id);
} else {
verbose("Signal %u to step %s.%u",
cancel_info->sig,
cancel_info->job_id_str, step_id);
}
_add_delay();
START_TIMER;
if ((!sig_set) || opt.ctld)
error_code = slurm_kill_job_step(job_id, step_id,
cancel_info->sig);
else if (cancel_info->sig == SIGKILL)
error_code = slurm_terminate_job_step(job_id, step_id);
else
error_code = slurm_signal_job_step(job_id, step_id,
cancel_info->sig);
END_TIMER;
slurm_mutex_lock(&max_delay_lock);
max_resp_time = MAX(max_resp_time, DELTA_TIMER);
slurm_mutex_unlock(&max_delay_lock);
if ((error_code == 0) ||
((errno != ESLURM_TRANSITION_STATE_NO_UPDATE) &&
(errno != ESLURM_JOB_PENDING)))
break;
verbose("Job is in transistional state, retrying");
sleep(5 + i);
}
if (error_code) {
error_code = slurm_get_errno();
if ((opt.verbose > 0) || (error_code != ESLURM_ALREADY_DONE))
error("Kill job error on job step id %s: %s",
cancel_info->job_id_str,
slurm_strerror(slurm_get_errno()));
if ((error_code == ESLURM_ALREADY_DONE) &&
(cancel_info->sig == SIGKILL)) {
error_code = 0; /* Ignore error if job done */
}
}
/* Purposely free the struct passed in here, so the caller doesn't have
* to keep track of it, but don't destroy the mutex and condition
* variables contained. */
slurm_mutex_lock(cancel_info->num_active_threads_lock);
*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
(*(cancel_info->num_active_threads))--;
slurm_cond_signal(cancel_info->num_active_threads_cond);
slurm_mutex_unlock(cancel_info->num_active_threads_lock);
xfree(cancel_info->job_id_str);
xfree(cancel_info);
return NULL;
}
示例11: _update_node_filesystem
/*
* _thread_update_node_energy calls _read_ipmi_values and updates all values
* for node consumption
*/
static int _update_node_filesystem(void)
{
static acct_filesystem_data_t fls;
static acct_filesystem_data_t current;
static acct_filesystem_data_t previous;
static bool first = true;
int cc;
slurm_mutex_lock(&lustre_lock);
cc = _read_lustre_counters();
if (cc != SLURM_SUCCESS) {
error("%s: Cannot read lustre counters", __func__);
slurm_mutex_unlock(&lustre_lock);
return SLURM_FAILURE;
}
if (first) {
/* First time initialize the counters and return.
*/
previous.reads = lustre_se.all_lustre_nb_reads;
previous.writes = lustre_se.all_lustre_nb_writes;
previous.read_size
= (double)lustre_se.all_lustre_read_bytes/1048576.0;
previous.write_size
= (double)lustre_se.all_lustre_write_bytes/1048576.0;
first = false;
memset(&lustre_se, 0, sizeof(lustre_sens_t));
slurm_mutex_unlock(&lustre_lock);
return SLURM_SUCCESS;
}
/* Compute the current values read from all lustre-xxxx
* directories
*/
current.reads = lustre_se.all_lustre_nb_reads;
current.writes = lustre_se.all_lustre_nb_writes;
current.read_size = (double)lustre_se.all_lustre_read_bytes/1048576.0;
current.write_size = (double)lustre_se.all_lustre_write_bytes/1048576.0;
/* Now compute the difference between the two snapshots
* and send it to hdf5 log.
*/
fls.reads = fls.reads + (current.reads - previous.reads);
fls.writes = fls.writes + (current.writes - previous.writes);
fls.read_size = fls.read_size
+ (current.read_size - previous.read_size);
fls.write_size = fls.write_size
+ (current.write_size - previous.write_size);
acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, &fls);
/* Save current as previous and clean up the working
* data structure.
*/
memcpy(&previous, ¤t, sizeof(acct_filesystem_data_t));
memset(&lustre_se, 0, sizeof(lustre_sens_t));
info("%s: num reads %"PRIu64" nums write %"PRIu64" "
"read %f MB wrote %f MB",
__func__, fls.reads, fls.writes, fls.read_size, fls.write_size);
slurm_mutex_unlock(&lustre_lock);
return SLURM_SUCCESS;
}
示例12: _cancel_job_id
static void *
_cancel_job_id (void *ci)
{
int error_code = SLURM_SUCCESS, i;
job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
bool sig_set = true;
uint16_t flags = 0;
char *job_type = "";
DEF_TIMERS;
if (cancel_info->sig == (uint16_t) NO_VAL) {
cancel_info->sig = SIGKILL;
sig_set = false;
}
if (opt.batch) {
flags |= KILL_JOB_BATCH;
job_type = "batch ";
}
if (opt.full) {
flags |= KILL_FULL_JOB;
job_type = "full ";
}
if (cancel_info->array_flag)
flags |= KILL_JOB_ARRAY;
if (!cancel_info->job_id_str) {
if (cancel_info->array_job_id &&
(cancel_info->array_task_id == INFINITE)) {
xstrfmtcat(cancel_info->job_id_str, "%u_*",
cancel_info->array_job_id);
} else if (cancel_info->array_job_id) {
xstrfmtcat(cancel_info->job_id_str, "%u_%u",
cancel_info->array_job_id,
cancel_info->array_task_id);
} else {
xstrfmtcat(cancel_info->job_id_str, "%u",
cancel_info->job_id);
}
}
if (!sig_set) {
verbose("Terminating %sjob %s", job_type,
cancel_info->job_id_str);
} else {
verbose("Signal %u to %sjob %s", cancel_info->sig, job_type,
cancel_info->job_id_str);
}
for (i = 0; i < MAX_CANCEL_RETRY; i++) {
_add_delay();
START_TIMER;
error_code = slurm_kill_job2(cancel_info->job_id_str,
cancel_info->sig, flags);
END_TIMER;
slurm_mutex_lock(&max_delay_lock);
max_resp_time = MAX(max_resp_time, DELTA_TIMER);
slurm_mutex_unlock(&max_delay_lock);
if ((error_code == 0) ||
(errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
break;
verbose("Job is in transistional state, retrying");
sleep(5 + i);
}
if (error_code) {
error_code = slurm_get_errno();
if ((opt.verbose > 0) ||
((error_code != ESLURM_ALREADY_DONE) &&
(error_code != ESLURM_INVALID_JOB_ID))) {
error("Kill job error on job id %s: %s",
cancel_info->job_id_str,
slurm_strerror(slurm_get_errno()));
}
if (((error_code == ESLURM_ALREADY_DONE) ||
(error_code == ESLURM_INVALID_JOB_ID)) &&
(cancel_info->sig == SIGKILL)) {
error_code = 0; /* Ignore error if job done */
}
}
/* Purposely free the struct passed in here, so the caller doesn't have
* to keep track of it, but don't destroy the mutex and condition
* variables contained. */
slurm_mutex_lock(cancel_info->num_active_threads_lock);
*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
(*(cancel_info->num_active_threads))--;
slurm_cond_signal(cancel_info->num_active_threads_cond);
slurm_mutex_unlock(cancel_info->num_active_threads_lock);
xfree(cancel_info->job_id_str);
xfree(cancel_info);
return NULL;
}
示例13: _cancel_jobs_by_state
static void
_cancel_jobs_by_state(uint32_t job_state, int filter_cnt, int *rc)
{
int i, err;
job_cancel_info_t *cancel_info;
job_info_t *job_ptr = job_buffer_ptr->job_array;
pthread_t dummy;
/* Spawn a thread to cancel each job or job step marked for
* cancellation */
if (opt.job_cnt) {
_cancel_jobid_by_state(job_state, filter_cnt, rc);
return;
}
for (i = 0; i < job_buffer_ptr->record_count; i++, job_ptr++) {
if (IS_JOB_FINISHED(job_ptr))
job_ptr->job_id = 0;
if (job_ptr->job_id == 0)
continue;
if ((job_state < JOB_END) &&
(job_ptr->job_state != job_state))
continue;
if (opt.interactive &&
(_confirmation(job_ptr, SLURM_BATCH_SCRIPT) == 0)) {
job_ptr->job_id = 0;
continue;
}
cancel_info = (job_cancel_info_t *)
xmalloc(sizeof(job_cancel_info_t));
cancel_info->job_id_str = _build_jobid_str(job_ptr);
cancel_info->rc = rc;
cancel_info->sig = opt.signal;
cancel_info->num_active_threads = &num_active_threads;
cancel_info->num_active_threads_lock =
&num_active_threads_lock;
cancel_info->num_active_threads_cond =
&num_active_threads_cond;
slurm_mutex_lock(&num_active_threads_lock);
num_active_threads++;
while (num_active_threads > MAX_THREADS) {
slurm_cond_wait(&num_active_threads_cond,
&num_active_threads_lock);
}
slurm_mutex_unlock(&num_active_threads_lock);
err = pthread_create(&dummy, &attr, _cancel_job_id,cancel_info);
if (err) /* Run in-line if thread create fails */
_cancel_job_id(cancel_info);
job_ptr->job_id = 0;
if (opt.interactive) {
/* Print any error message for first job before
* starting confirmation of next job */
slurm_mutex_lock(&num_active_threads_lock);
while (num_active_threads > 0) {
slurm_cond_wait(&num_active_threads_cond,
&num_active_threads_lock);
}
slurm_mutex_unlock(&num_active_threads_lock);
}
}
}
示例14: _cancel_jobid_by_state
static void _cancel_jobid_by_state(uint32_t job_state, int filter_cnt, int *rc)
{
job_cancel_info_t *cancel_info;
job_info_t *job_ptr;
pthread_t dummy;
int err, i, j;
if (opt.job_cnt == 0)
return;
for (j = 0; j < opt.job_cnt; j++) {
if (opt.job_id[j] == 0)
continue;
if ((job_state == JOB_PENDING) && !opt.job_pend[j])
continue;
job_ptr = job_buffer_ptr->job_array;
for (i = 0; i < job_buffer_ptr->record_count; i++, job_ptr++) {
if (IS_JOB_FINISHED(job_ptr))
job_ptr->job_id = 0;
if (job_ptr->job_id == 0)
continue;
if ((opt.step_id[j] != SLURM_BATCH_SCRIPT) &&
IS_JOB_PENDING(job_ptr)) {
/* User specified #.# for step, but the job ID
* may be job array leader with part of job
* array running with other tasks pending */
continue;
}
opt.job_found[j] = false;
if (opt.array_id[j] == NO_VAL) {
if ((opt.job_id[j] == job_ptr->job_id) ||
((opt.job_id[j] == job_ptr->array_job_id) &&
(opt.step_id[j] == SLURM_BATCH_SCRIPT))) {
opt.job_found[j] = true;
}
} else if (opt.array_id[j] == INFINITE) {
if (opt.job_id[j] == job_ptr->array_job_id) {
opt.job_found[j] = true;
}
} else if (opt.job_id[j] != job_ptr->array_job_id) {
continue;
} else if (_is_task_in_job(job_ptr, opt.array_id[j])) {
opt.job_found[j] = true;
}
if (!opt.job_found[j])
continue;
if (opt.interactive &&
(_confirmation(job_ptr, opt.step_id[j]) == 0)) {
job_ptr->job_id = 0; /* Don't check again */
continue;
}
slurm_mutex_lock(&num_active_threads_lock);
num_active_threads++;
while (num_active_threads > MAX_THREADS) {
slurm_cond_wait(&num_active_threads_cond,
&num_active_threads_lock);
}
slurm_mutex_unlock(&num_active_threads_lock);
cancel_info = (job_cancel_info_t *)
xmalloc(sizeof(job_cancel_info_t));
cancel_info->rc = rc;
cancel_info->sig = opt.signal;
cancel_info->num_active_threads = &num_active_threads;
cancel_info->num_active_threads_lock =
&num_active_threads_lock;
cancel_info->num_active_threads_cond =
&num_active_threads_cond;
if (opt.step_id[j] == SLURM_BATCH_SCRIPT) {
cancel_info->job_id_str =
_build_jobid_str(job_ptr);
err = pthread_create(&dummy, &attr,
_cancel_job_id,
cancel_info);
if (err) /* Run in-line as needed */
_cancel_job_id(cancel_info);
job_ptr->job_id = 0;
} else {
cancel_info->job_id = job_ptr->job_id;
cancel_info->step_id = opt.step_id[j];
err = pthread_create(&dummy, &attr,
_cancel_step_id,
cancel_info);
if (err) /* Run in-line as needed */
_cancel_step_id(cancel_info);
}
if (opt.interactive) {
/* Print any error message for first job before
* starting confirmation of next job */
slurm_mutex_lock(&num_active_threads_lock);
while (num_active_threads > 0) {
slurm_cond_wait(&num_active_threads_cond,
&num_active_threads_lock);
}
//.........这里部分代码省略.........
示例15: slurmdbd_conf_unlock
extern void slurmdbd_conf_unlock(void)
{
slurm_mutex_unlock(&conf_mutex);
}