Hello i migrated from slurm 16 to slurm 17.11.4 but my node feature not work, i apply my feature and i reboot but when machine is rebooted slurm make my node in drain
with sinfo -r i have this error message : Failed to reboot nodes machine415 into expected state for job 945 My problem is located in slurm source at this file : slurm-17.11.4/src/slurmctld/job_sheduler.c ligne 4222 static void *_wait_boot(void *arg) { if (boot_node_bitmap && bit_set_count(boot_node_bitmap)) { char *node_list = bitmap2node_name(boot_node_bitmap); error("Failed to reboot nodes %s into expected state for job %u", node_list, job_ptr->job_id); (void) drain_nodes(node_list, "Node mode change failure", getuid()); xfree(node_list); (void) job_requeue(getuid(), job_ptr->job_id, NULL, false, 0); } i comment this function and my node feature plugin work ! but i want to solve my problem with modify my plugin code but I don't know what to change in my code what should I save as a variable in my code? with xalloc or xrealloc. i share my basic sour ce code, i think that i save active_features when function node_features_p_node_set is called, but how can i save this variable ? i try to save *active_features into char **current_mode but i have same error thanks for advance for your help #include "config.h" #define _GNU_SOURCE /* For POLLRDHUP */ #include <ctype.h> #include <poll.h> #include <signal.h> #include <stdlib.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #if defined(__FreeBSD__) || defined(__NetBSD__) #define POLLRDHUP POLLHUP #endif #include "slurm/slurm.h" #include "src/common/assoc_mgr.h" #include "src/common/bitstring.h" #include "src/common/fd.h" #include "src/common/gres.h" #include "src/common/list.h" #include "src/common/macros.h" #include "src/common/pack.h" #include "src/common/parse_config.h" #include "src/common/slurm_protocol_api.h" //#include "src/common/slurm_strcasestr.h" #include "src/common/timers.h" #include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/slurmctld/job_scheduler.h" #include "src/slurmctld/locks.h" #include "src/slurmctld/node_scheduler.h" #include "src/slurmctld/reservation.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmctld/state_save.h" const char plugin_name[] = "node_features update_linux plugin"; const char plugin_type[] = "node_features/update_linux"; const uint32_t plugin_version = SLURM_VERSION_NUMBER; extern int node_features_p_get_config() { } extern bool node_features_p_node_update_valid(void *node_ptr, update_node_msg_t *update_node_msg) { return true; } extern uint32_t node_features_p_boot_time(void) { uint32_t boot_time = (6 * 60); /* 6 minute estimated boot time */ return boot_time; } extern bool node_features_p_changible_feature(char *feature) { return true; } extern bool node_features_p_changeable_feature(char *feature) { return true; } extern void node_features_p_step_config(bool mem_sort, bitstr_t *numa_bitmap) { } extern char *node_features_p_node_xlate2(char *new_features) { return new_features; } /* Load configuration */ extern int init(void) { debug("init_new plugin*********************************************************************************"); int rc = SLURM_SUCCESS; return rc; } extern int fini(void) { debug("fini_*********************************************************************************"); return SLURM_SUCCESS; } extern int node_features_p_get_node(char *node_list) { debug("node_features_p_get_node_*********************************************************************************"); return SLURM_SUCCESS; } extern char *node_features_p_node_xlate(char *new_features, char *orig_features, char *avail_features) { return avail_features; } extern void node_features_p_node_state(char **avail_modes, char **current_mode) { debug("node_features_p_node_state_*********************************************************************************"); } /* Translate a job's feature request to the node features needed at boot time */ extern char *node_features_p_job_xlate(char *job_features) { debug("node_features_p_job_xlate_*********************************************************************************"); return job_features; } /* Test if a job's feature specification is valid */ extern int node_features_p_job_valid(char *job_features) { debug("node_features_p_job_valid_*********************************************************************************"); return SLURM_SUCCESS; } bool node_features_g_node_update_valid(void *node_ptr, update_node_msg_t *update_node_msg) { return true; } /* Set's the node's active features based upon job constraints. * NOTE: Executed by the slurmd daemon. * IN active_features - New active features * RET error code */ extern int node_features_p_node_set(char *active_features) { debug("node_features_p_node_set_*********************************************************************************"); int error_code = SLURM_SUCCESS; active_features[0] = '\0'; return error_code; } /* Return true if the plugin requires PowerSave mode for booting nodes */ extern bool node_features_p_node_power(void) { debug("node_features_p_node_power_*********************************************************************************"); return false; } extern int node_features_p_node_update(char *active_features, bitstr_t *node_bitmap) { debug("_node_features_p_node_update********************************************************************************"); return SLURM_SUCCESS; } /* Reload configuration */ extern int node_features_p_reconfig(void) { debug("_node_features_p_reconfig********************************************************************************"); return 1; } /* Determine if the specified user can modify the currently available node * features */ extern bool node_features_p_user_update(uid_t uid) { debug("_node_features_p_user_update********************************************************************************"); return true; }