/src/scontrol/scontrol.c
C | 1841 lines | 1582 code | 114 blank | 145 comment | 590 complexity | be3550085834976c42243828dca8ab2a MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
Large files files are truncated, but you can click here to view the full file
- /*****************************************************************************\
- * scontrol.c - administration tool for slurm.
- * provides interface to read, write, update, and configurations.
- *****************************************************************************
- * Copyright (C) 2002-2007 The Regents of the University of California.
- * Copyright (C) 2008-2010 Lawrence Livermore National Security.
- * Portions Copyright (C) 2008 Vijay Ramasubramanian.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Morris Jette <jette1@llnl.gov>
- * CODE-OCEC-09-009. All rights reserved.
- *
- * This file is part of SLURM, a resource management program.
- * For details, see <http://www.schedmd.com/slurmdocs/>.
- * Please also read the included file: DISCLAIMER.
- *
- * SLURM is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * In addition, as a special exception, the copyright holders give permission
- * to link the code of portions of this program with the OpenSSL library under
- * certain conditions as described in each individual source file, and
- * distribute linked combinations including the two. You must obey the GNU
- * General Public License in all respects for all of the code used other than
- * OpenSSL. If you modify file(s) with this exception, you may extend this
- * exception to your version of the file(s), but you are not obligated to do
- * so. If you do not wish to do so, delete this exception statement from your
- * version. If you delete this exception statement from all source files in
- * the program, then also delete it here.
- *
- * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with SLURM; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- \*****************************************************************************/
- #include "scontrol.h"
- #include "src/plugins/select/bluegene/bg_enums.h"
- #include "src/common/proc_args.h"
- #define OPT_LONG_HIDE 0x102
- char *command_name;
- List clusters = NULL;
- int all_flag; /* display even hidden partitions */
- int detail_flag; /* display additional details */
- int exit_code; /* scontrol's exit code, =1 on any error at any time */
- int exit_flag; /* program to terminate if =1 */
- int input_words; /* number of words of input permitted */
- int one_liner; /* one record per line if =1 */
- int quiet_flag; /* quiet=1, verbose=-1, normal=0 */
- int verbosity; /* count of "-v" options */
- uint32_t cluster_flags; /* what type of cluster are we talking to */
- block_info_msg_t *old_block_info_ptr = NULL;
- front_end_info_msg_t *old_front_end_info_ptr = NULL;
- job_info_msg_t *old_job_info_ptr = NULL;
- node_info_msg_t *old_node_info_ptr = NULL;
- partition_info_msg_t *old_part_info_ptr = NULL;
- reserve_info_msg_t *old_res_info_ptr = NULL;
- slurm_ctl_conf_info_msg_t *old_slurm_ctl_conf_ptr = NULL;
- static void _create_it (int argc, char *argv[]);
- static void _delete_it (int argc, char *argv[]);
- static void _show_it (int argc, char *argv[]);
- static int _get_command (int *argc, char *argv[]);
- static void _ping_slurmctld(char *control_machine,
- char *backup_controller);
- static void _print_config (char *config_param);
- static void _print_daemons (void);
- static void _print_aliases (char* node_hostname);
- static void _print_ping (void);
- static void _print_slurmd(char *hostlist);
- static void _print_version( void );
- static int _process_command (int argc, char *argv[]);
- static void _update_it (int argc, char *argv[]);
- static int _update_bluegene_block (int argc, char *argv[]);
- static int _update_bluegene_subbp (int argc, char *argv[]);
- static int _update_slurmctld_debug(char *val);
- static void _usage ();
- int
- main (int argc, char *argv[])
- {
- int error_code = SLURM_SUCCESS, i, opt_char, input_field_count;
- char **input_fields, *env_val;
- log_options_t opts = LOG_OPTS_STDERR_ONLY ;
- int option_index;
- static struct option long_options[] = {
- {"all", 0, 0, 'a'},
- {"cluster", 1, 0, 'M'},
- {"clusters", 1, 0, 'M'},
- {"details", 0, 0, 'd'},
- {"help", 0, 0, 'h'},
- {"hide", 0, 0, OPT_LONG_HIDE},
- {"oneliner", 0, 0, 'o'},
- {"quiet", 0, 0, 'Q'},
- {"usage", 0, 0, 'h'},
- {"verbose", 0, 0, 'v'},
- {"version", 0, 0, 'V'},
- {NULL, 0, 0, 0}
- };
- command_name = argv[0];
- all_flag = 0;
- detail_flag = 0;
- exit_code = 0;
- exit_flag = 0;
- input_field_count = 0;
- quiet_flag = 0;
- verbosity = 0;
- log_init("scontrol", opts, SYSLOG_FACILITY_DAEMON, NULL);
- if (getenv ("SCONTROL_ALL"))
- all_flag= 1;
- if ((env_val = getenv("SLURM_CLUSTERS"))) {
- if (!(clusters = slurmdb_get_info_cluster(env_val))) {
- error("'%s' can't be reached now, "
- "or it is an invalid entry for "
- "SLURM_CLUSTERS. Use 'sacctmgr --list "
- "cluster' to see available clusters.",
- env_val);
- exit(1);
- }
- working_cluster_rec = list_peek(clusters);
- }
- while((opt_char = getopt_long(argc, argv, "adhM:oQvV",
- long_options, &option_index)) != -1) {
- switch (opt_char) {
- case (int)'?':
- fprintf(stderr, "Try \"scontrol --help\" for "
- "more information\n");
- exit(1);
- break;
- case (int)'a':
- all_flag = 1;
- break;
- case (int)'d':
- detail_flag++;
- break;
- case (int)'h':
- _usage ();
- exit(exit_code);
- break;
- case OPT_LONG_HIDE:
- all_flag = 0;
- detail_flag = 0;
- break;
- case (int)'M':
- if (clusters) {
- list_destroy(clusters);
- clusters = NULL;
- working_cluster_rec = NULL;
- }
- if (!(clusters = slurmdb_get_info_cluster(optarg))) {
- error("'%s' can't be reached now, "
- "or it is an invalid entry for "
- "--cluster. Use 'sacctmgr --list "
- "cluster' to see available clusters.",
- optarg);
- exit(1);
- }
- working_cluster_rec = list_peek(clusters);
- break;
- case (int)'o':
- one_liner = 1;
- break;
- case (int)'Q':
- quiet_flag = 1;
- break;
- case (int)'v':
- quiet_flag = -1;
- verbosity++;
- break;
- case (int)'V':
- _print_version();
- exit(exit_code);
- break;
- default:
- exit_code = 1;
- fprintf(stderr, "getopt error, returned %c\n",
- opt_char);
- exit(exit_code);
- }
- }
- if (clusters && (list_count(clusters) > 1))
- fatal("Only one cluster can be used at a time with scontrol");
- cluster_flags = slurmdb_setup_cluster_flags();
- if (verbosity) {
- opts.stderr_level += verbosity;
- log_alter(opts, SYSLOG_FACILITY_USER, NULL);
- }
- if (argc > MAX_INPUT_FIELDS) /* bogus input, but continue anyway */
- input_words = argc;
- else
- input_words = 128;
- input_fields = (char **) xmalloc (sizeof (char *) * input_words);
- if (optind < argc) {
- for (i = optind; i < argc; i++) {
- input_fields[input_field_count++] = argv[i];
- }
- }
- if (input_field_count)
- exit_flag = 1;
- else
- error_code = _get_command (&input_field_count, input_fields);
- while (error_code == SLURM_SUCCESS) {
- error_code = _process_command (input_field_count,
- input_fields);
- if (error_code || exit_flag)
- break;
- error_code = _get_command (&input_field_count, input_fields);
- }
- if (clusters)
- list_destroy(clusters);
- exit(exit_code);
- }
- static void _print_version(void)
- {
- print_slurm_version();
- if (quiet_flag == -1) {
- long version = slurm_api_version();
- printf("slurm_api_version: %ld, %ld.%ld.%ld\n", version,
- SLURM_VERSION_MAJOR(version),
- SLURM_VERSION_MINOR(version),
- SLURM_VERSION_MICRO(version));
- }
- }
- #if !HAVE_READLINE
- /*
- * Alternative to readline if readline is not available
- */
- static char *_getline(const char *prompt)
- {
- char buf[4096];
- char *line;
- int len;
- printf("%s", prompt);
- /* we only set this here to avoid a warning. We throw it away
- later. */
- line = fgets(buf, 4096, stdin);
- len = strlen(buf);
- if ((len > 0) && (buf[len-1] == '\n'))
- buf[len-1] = '\0';
- else
- len++;
- line = malloc (len * sizeof(char));
- return strncpy(line, buf, len);
- }
- #endif
- /*
- * _get_command - get a command from the user
- * OUT argc - location to store count of arguments
- * OUT argv - location to store the argument list
- */
- static int
- _get_command (int *argc, char **argv)
- {
- char *in_line;
- static char *last_in_line = NULL;
- int i, in_line_size;
- static int last_in_line_size = 0;
- *argc = 0;
- #if HAVE_READLINE
- in_line = readline ("scontrol: ");
- #else
- in_line = _getline("scontrol: ");
- #endif
- if (in_line == NULL)
- return 0;
- else if (strcmp (in_line, "!!") == 0) {
- free (in_line);
- in_line = last_in_line;
- in_line_size = last_in_line_size;
- } else {
- if (last_in_line)
- free (last_in_line);
- last_in_line = in_line;
- last_in_line_size = in_line_size = strlen (in_line);
- }
- #if HAVE_READLINE
- add_history(in_line);
- #endif
- /* break in_line into tokens */
- for (i = 0; i < in_line_size; i++) {
- bool double_quote = false, single_quote = false;
- if (in_line[i] == '\0')
- break;
- if (isspace ((int) in_line[i]))
- continue;
- if (((*argc) + 1) > MAX_INPUT_FIELDS) { /* bogus input line */
- exit_code = 1;
- fprintf (stderr,
- "%s: can not process over %d words\n",
- command_name, input_words);
- return E2BIG;
- }
- argv[(*argc)++] = &in_line[i];
- for (i++; i < in_line_size; i++) {
- if (in_line[i] == '\042') {
- double_quote = !double_quote;
- continue;
- }
- if (in_line[i] == '\047') {
- single_quote = !single_quote;
- continue;
- }
- if (in_line[i] == '\0')
- break;
- if (double_quote || single_quote)
- continue;
- if (isspace ((int) in_line[i])) {
- in_line[i] = '\0';
- break;
- }
- }
- }
- return 0;
- }
- /*
- * _print_config - print the specified configuration parameter and value
- * IN config_param - NULL to print all parameters and values
- */
- static void
- _print_config (char *config_param)
- {
- int error_code;
- slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL;
- if (old_slurm_ctl_conf_ptr) {
- error_code = slurm_load_ctl_conf (
- old_slurm_ctl_conf_ptr->last_update,
- &slurm_ctl_conf_ptr);
- if (error_code == SLURM_SUCCESS)
- slurm_free_ctl_conf(old_slurm_ctl_conf_ptr);
- else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
- slurm_ctl_conf_ptr = old_slurm_ctl_conf_ptr;
- error_code = SLURM_SUCCESS;
- if (quiet_flag == -1) {
- printf ("slurm_load_ctl_conf no change "
- "in data\n");
- }
- }
- }
- else
- error_code = slurm_load_ctl_conf ((time_t) NULL,
- &slurm_ctl_conf_ptr);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_load_ctl_conf error");
- }
- else
- old_slurm_ctl_conf_ptr = slurm_ctl_conf_ptr;
- if (error_code == SLURM_SUCCESS) {
- slurm_print_ctl_conf (stdout, slurm_ctl_conf_ptr) ;
- fprintf(stdout, "\n");
- }
- if (slurm_ctl_conf_ptr)
- _ping_slurmctld (slurm_ctl_conf_ptr->control_machine,
- slurm_ctl_conf_ptr->backup_controller);
- }
- /* Print slurmd status on localhost.
- * Parse hostlist in the future */
- static void _print_slurmd(char *hostlist)
- {
- slurmd_status_t *slurmd_status;
- if (slurm_load_slurmd_status(&slurmd_status)) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror("slurm_load_slurmd_status");
- } else {
- slurm_print_slurmd_status(stdout, slurmd_status);
- slurm_free_slurmd_status(slurmd_status);
- }
- }
- /* Print state of controllers only */
- static void
- _print_ping (void)
- {
- slurm_ctl_conf_info_msg_t *conf;
- char *primary, *secondary;
- slurm_conf_init(NULL);
- conf = slurm_conf_lock();
- primary = xstrdup(conf->control_machine);
- secondary = xstrdup(conf->backup_controller);
- slurm_conf_unlock();
- _ping_slurmctld (primary, secondary);
- xfree(primary);
- xfree(secondary);
- }
- /* Report if slurmctld daemons are responding */
- static void
- _ping_slurmctld(char *control_machine, char *backup_controller)
- {
- static char *state[2] = { "UP", "DOWN" };
- int primary = 1, secondary = 1;
- int down_msg = 0;
- if (slurm_ping(1) == SLURM_SUCCESS)
- primary = 0;
- if (slurm_ping(2) == SLURM_SUCCESS)
- secondary = 0;
- fprintf(stdout, "Slurmctld(primary/backup) ");
- if (control_machine || backup_controller) {
- fprintf(stdout, "at ");
- if (control_machine) {
- fprintf(stdout, "%s/", control_machine);
- if (primary)
- down_msg = 1;
- } else
- fprintf(stdout, "(NULL)/");
- if (backup_controller) {
- fprintf(stdout, "%s ", backup_controller);
- if (secondary)
- down_msg = 1;
- } else
- fprintf(stdout, "(NULL) ");
- }
- fprintf(stdout, "are %s/%s\n",
- state[primary], state[secondary]);
- if (down_msg && (getuid() == 0)) {
- fprintf(stdout, "*****************************************\n");
- fprintf(stdout, "** RESTORE SLURMCTLD DAEMON TO SERVICE **\n");
- fprintf(stdout, "*****************************************\n");
- }
- }
- /*
- * _print_daemons - report what daemons should be running on this node
- */
- static void
- _print_daemons (void)
- {
- slurm_ctl_conf_info_msg_t *conf;
- char me[MAX_SLURM_NAME], *b, *c, *n, *token, *save_ptr = NULL;
- int actld = 0, ctld = 0, d = 0;
- char daemon_list[] = "slurmctld slurmd";
- slurm_conf_init(NULL);
- conf = slurm_conf_lock();
- gethostname_short(me, MAX_SLURM_NAME);
- if ((b = conf->backup_controller)) {
- if ((strcmp(b, me) == 0) ||
- (strcasecmp(b, "localhost") == 0))
- ctld = 1;
- }
- if (conf->control_machine) {
- actld = 1;
- c = xstrdup(conf->control_machine);
- token = strtok_r(c, ",", &save_ptr);
- while (token) {
- if ((strcmp(token, me) == 0) ||
- (strcasecmp(token, "localhost") == 0)) {
- ctld = 1;
- break;
- }
- token = strtok_r(NULL, ",", &save_ptr);
- }
- xfree(c);
- }
- slurm_conf_unlock();
- if ((n = slurm_conf_get_nodename(me))) {
- d = 1;
- xfree(n);
- } else if ((n = slurm_conf_get_aliased_nodename())) {
- d = 1;
- xfree(n);
- } else if ((n = slurm_conf_get_nodename("localhost"))) {
- d = 1;
- xfree(n);
- }
- strcpy(daemon_list, "");
- if (actld && ctld)
- strcat(daemon_list, "slurmctld ");
- if (actld && d)
- strcat(daemon_list, "slurmd");
- fprintf (stdout, "%s\n", daemon_list) ;
- }
- /*
- * _print_aliases - report which aliases should be running on this node
- */
- static void
- _print_aliases (char* node_hostname)
- {
- char me[MAX_SLURM_NAME], *n = NULL, *a = NULL;
- char *s;
- slurm_conf_init(NULL);
- if (!node_hostname) {
- gethostname_short(me, MAX_SLURM_NAME);
- s = me;
- } else
- s = node_hostname;
- if (!(n = slurm_conf_get_aliases(s)) && (s == me)) {
- if (!(a = slurm_conf_get_aliased_nodename()))
- a = slurm_conf_get_nodename("localhost");
- if (a) {
- n = slurm_conf_get_aliases(a);
- xfree(a);
- }
- }
- if (n) {
- fprintf(stdout, "%s\n", n);
- xfree(n);
- }
- }
- /*
- * _reboot_nodes - issue RPC to have computing nodes reboot when idle
- * RET 0 or a slurm error code
- */
- static int _reboot_nodes(char *node_list)
- {
- slurm_ctl_conf_t *conf;
- int rc;
- slurm_msg_t msg;
- reboot_msg_t req;
- conf = slurm_conf_lock();
- if (conf->reboot_program == NULL) {
- error("RebootProgram isn't defined");
- slurm_conf_unlock();
- slurm_seterrno(SLURM_ERROR);
- return SLURM_ERROR;
- }
- slurm_conf_unlock();
- slurm_msg_t_init(&msg);
- req.node_list = node_list;
- msg.msg_type = REQUEST_REBOOT_NODES;
- msg.data = &req;
- if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
- return SLURM_ERROR;
- if (rc)
- slurm_seterrno_ret(rc);
- return rc;
- }
- /*
- * _process_command - process the user's command
- * IN argc - count of arguments
- * IN argv - the arguments
- * RET 0 or errno (only for errors fatal to scontrol)
- */
- static int
- _process_command (int argc, char *argv[])
- {
- int error_code = 0;
- char *tag = argv[0];
- int tag_len = 0;
- if (argc < 1) {
- exit_code = 1;
- if (quiet_flag == -1)
- fprintf(stderr, "no input");
- return 0;
- } else if(tag)
- tag_len = strlen(tag);
- else {
- if (quiet_flag == -1)
- fprintf(stderr, "input problem");
- return 0;
- }
- if (strncasecmp (tag, "abort", MAX(tag_len, 5)) == 0) {
- /* require full command name */
- if (argc > 2) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- error_code = slurm_shutdown (1);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_shutdown error");
- }
- }
- else if (strncasecmp (tag, "all", MAX(tag_len, 2)) == 0)
- all_flag = 1;
- else if (strncasecmp (tag, "completing", MAX(tag_len, 2)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- scontrol_print_completing();
- }
- else if (strncasecmp (tag, "cluster", MAX(tag_len, 2)) == 0) {
- if (clusters) {
- list_destroy(clusters);
- clusters = NULL;
- working_cluster_rec = NULL;
- }
- if (argc >= 2) {
- if (!(clusters = slurmdb_get_info_cluster(argv[1]))) {
- error("'%s' can't be reached now, "
- "or it is an invalid entry for "
- "--cluster. Use 'sacctmgr --list "
- "cluster' to see available clusters.",
- optarg);
- exit(1);
- }
- working_cluster_rec = list_peek(clusters);
- if (list_count(clusters) > 1) {
- fatal("Only one cluster can be used at a time "
- "with scontrol");
- }
- }
- cluster_flags = slurmdb_setup_cluster_flags();
- slurm_free_block_info_msg(old_block_info_ptr);
- old_block_info_ptr = NULL;
- slurm_free_front_end_info_msg(old_front_end_info_ptr);
- old_front_end_info_ptr = NULL;
- slurm_free_job_info_msg(old_job_info_ptr);
- old_job_info_ptr = NULL;
- slurm_free_node_info_msg(old_node_info_ptr);
- old_node_info_ptr = NULL;
- slurm_free_partition_info_msg(old_part_info_ptr);
- old_part_info_ptr = NULL;
- slurm_free_reservation_info_msg(old_res_info_ptr);
- old_res_info_ptr = NULL;
- slurm_free_ctl_conf(old_slurm_ctl_conf_ptr);
- old_slurm_ctl_conf_ptr = NULL;
- /* if(old_block_info_ptr) */
- /* old_block_info_ptr->last_update = 0; */
- /* if(old_job_info_ptr) */
- /* old_job_info_ptr->last_update = 0; */
- /* if(old_node_info_ptr) */
- /* old_node_info_ptr->last_update = 0; */
- /* if(old_part_info_ptr) */
- /* old_part_info_ptr->last_update = 0; */
- /* if(old_res_info_ptr) */
- /* old_res_info_ptr->last_update = 0; */
- /* if(old_slurm_ctl_conf_ptr) */
- /* old_slurm_ctl_conf_ptr->last_update = 0; */
- }
- else if (strncasecmp (tag, "create", MAX(tag_len, 2)) == 0) {
- if (argc < 2) {
- exit_code = 1;
- fprintf (stderr, "too few arguments for %s keyword\n",
- tag);
- return 0;
- }
- _create_it ((argc - 1), &argv[1]);
- }
- else if (strncasecmp (tag, "details", MAX(tag_len, 1)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- return 0;
- }
- detail_flag = 1;
- }
- else if (strncasecmp (tag, "script", MAX(tag_len, 3)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- return 0;
- }
- detail_flag = 2;
- }
- else if (strncasecmp (tag, "exit", MAX(tag_len, 1)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- exit_flag = 1;
- }
- else if (strncasecmp (tag, "help", MAX(tag_len, 2)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- _usage ();
- }
- else if (strncasecmp (tag, "hide", MAX(tag_len, 2)) == 0) {
- all_flag = 0;
- detail_flag = 0;
- }
- else if (strncasecmp (tag, "oneliner", MAX(tag_len, 1)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- one_liner = 1;
- }
- else if (strncasecmp (tag, "pidinfo", MAX(tag_len, 3)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- fprintf (stderr,
- "missing argument for keyword:%s\n",
- tag);
- } else
- scontrol_pid_info ((pid_t) atol (argv[1]) );
- }
- else if (strncasecmp (tag, "ping", MAX(tag_len, 3)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- _print_ping ();
- }
- else if ((strncasecmp (tag, "\\q", 2) == 0) ||
- (strncasecmp (tag, "quiet", MAX(tag_len, 4)) == 0)) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr, "too many arguments for keyword:%s\n",
- tag);
- }
- quiet_flag = 1;
- }
- else if (strncasecmp (tag, "quit", MAX(tag_len, 4)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- exit_flag = 1;
- }
- else if (strncasecmp (tag, "reboot_nodes", MAX(tag_len, 3)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- error_code = _reboot_nodes("ALL");
- } else
- error_code = _reboot_nodes(argv[1]);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("scontrol_reboot_nodes error");
- }
- }
- else if (strncasecmp (tag, "reconfigure", MAX(tag_len, 3)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- fprintf (stderr, "too many arguments for keyword:%s\n",
- tag);
- }
- error_code = slurm_reconfigure();
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_reconfigure error");
- }
- }
- else if (strncasecmp (tag, "checkpoint", MAX(tag_len, 2)) == 0) {
- if (argc > 5) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- else if (argc < 3) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- }
- else {
- error_code = scontrol_checkpoint(argv[1], argv[2],
- argc - 3, &argv[3]);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror(
- "scontrol_checkpoint error");
- }
- }
- }
- else if (strncasecmp (tag, "requeue", MAX(tag_len, 3)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- error_code = scontrol_requeue(argv[1]);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_requeue error");
- }
- }
- }
- else if ((strncasecmp (tag, "hold", 4) == 0) ||
- (strncasecmp (tag, "holdu", 5) == 0) ||
- (strncasecmp (tag, "uhold", 5) == 0) ||
- (strncasecmp (tag, "release", MAX(tag_len, 3)) == 0)) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- error_code = scontrol_hold(argv[0], argv[1]);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_update_job error");
- }
- }
- }
- else if ((strncasecmp (tag, "suspend", MAX(tag_len, 2)) == 0) ||
- (strncasecmp (tag, "resume", MAX(tag_len, 3)) == 0)) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- error_code = scontrol_suspend(argv[0], argv[1]);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_suspend error");
- }
- }
- }
- else if (strncasecmp (tag, "wait_job", MAX(tag_len, 2)) == 0) {
- if (cluster_flags & CLUSTER_FLAG_CRAYXT) {
- fprintf(stderr,
- "wait_job is handled automatically on Cray.\n");
- } else if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- error_code = scontrol_job_ready(argv[1]);
- if (error_code)
- exit_code = 1;
- }
- }
- else if (strncasecmp (tag, "setdebugflags", MAX(tag_len, 9)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- int i, mode = 0;
- uint32_t debug_flags_plus = 0;
- uint32_t debug_flags_minus = 0, flags;
- for (i = 1; i < argc; i++) {
- if (argv[i][0] == '+')
- mode = 1;
- else if (argv[i][0] == '-')
- mode = -1;
- else {
- mode = 0;
- break;
- }
- flags = debug_str2flags(&argv[i][1]);
- if (flags == NO_VAL)
- break;
- if (mode == 1)
- debug_flags_plus |= flags;
- else
- debug_flags_minus |= flags;
- }
- if (i < argc) {
- exit_code = 1;
- if (quiet_flag != 1) {
- fprintf(stderr, "invalid debug "
- "flag: %s\n", argv[i]);
- }
- if ((quiet_flag != 1) && (mode = 0)) {
- fprintf(stderr, "Usage: setdebugflags"
- " [+|-]NAME\n");
- }
- } else {
- error_code = slurm_set_debugflags(
- debug_flags_plus, debug_flags_minus);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror(
- "slurm_set_debug_flags"
- " error");
- }
- }
- }
- }
- else if (strncasecmp (tag, "setdebug", MAX(tag_len, 2)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- int level = -1;
- char *endptr;
- char *levels[] = {
- "quiet", "fatal", "error", "info", "verbose",
- "debug", "debug2", "debug3", "debug4",
- "debug5", NULL};
- int index = 0;
- while (levels[index]) {
- if (strcasecmp(argv[1], levels[index]) == 0) {
- level = index;
- break;
- }
- index ++;
- }
- if (level == -1) {
- /* effective levels: 0 - 9 */
- level = (int)strtoul (argv[1], &endptr, 10);
- if (*endptr != '\0' || level > 9) {
- level = -1;
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr, "invalid "
- "debug level: %s\n",
- argv[1]);
- }
- }
- if (level != -1) {
- error_code = slurm_set_debug_level(
- level);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror(
- "slurm_set_debug_level "
- "error");
- }
- }
- }
- }
- else if (strncasecmp (tag, "schedloglevel", MAX(tag_len, 3)) == 0) {
- if (argc > 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else {
- int level = -1;
- char *endptr;
- char *levels[] = {
- "disable", "enable", NULL};
- int index = 0;
- while (levels[index]) {
- if (strcasecmp(argv[1], levels[index]) == 0) {
- level = index;
- break;
- }
- index ++;
- }
- if (level == -1) {
- /* effective levels: 0 - 1 */
- level = (int)strtoul (argv[1], &endptr, 10);
- if (*endptr != '\0' || level > 1) {
- level = -1;
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr, "invalid schedlog "
- "level: %s\n", argv[1]);
- }
- }
- if (level != -1) {
- error_code = slurm_set_schedlog_level(
- level);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror(
- "slurm_set_schedlog_level"
- " error");
- }
- }
- }
- }
- else if (strncasecmp (tag, "show", MAX(tag_len, 3)) == 0) {
- _show_it (argc, argv);
- }
- else if (strncasecmp (tag, "takeover", MAX(tag_len, 8)) == 0) {
- char *secondary = NULL;
- slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL;
- slurm_ctl_conf_ptr = slurm_conf_lock();
- secondary = xstrdup(slurm_ctl_conf_ptr->backup_controller);
- slurm_conf_unlock();
- if ( secondary && secondary[0] != '\0' ) {
- error_code = slurm_takeover();
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror("slurm_takeover error");
- }
- } else {
- fprintf(stderr, "slurm_takeover error: no backup "
- "controller defined\n");
- }
- xfree(secondary);
- }
- else if (strncasecmp (tag, "shutdown", MAX(tag_len, 8)) == 0) {
- /* require full command name */
- uint16_t options = 0;
- if (argc == 2) {
- if (strcmp(argv[1], "slurmctld") &&
- strcmp(argv[1], "controller")) {
- error_code = 1;
- exit_code = 1;
- fprintf (stderr,
- "invalid shutdown argument:%s\n",
- argv[1]);
- } else
- options= 2;
- } else if (argc > 2) {
- error_code = 1;
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- }
- if (error_code == 0) {
- error_code = slurm_shutdown(options);
- if (error_code) {
- exit_code = 1;
- if (quiet_flag != 1)
- slurm_perror ("slurm_shutdown error");
- }
- }
- }
- else if (strncasecmp (tag, "update", MAX(tag_len, 1)) == 0) {
- if (argc < 2) {
- exit_code = 1;
- fprintf (stderr, "too few arguments for %s keyword\n",
- tag);
- return 0;
- }
- _update_it ((argc - 1), &argv[1]);
- }
- else if (strncasecmp (tag, "delete", MAX(tag_len, 1)) == 0) {
- if (argc < 2) {
- exit_code = 1;
- fprintf (stderr, "too few arguments for %s keyword\n",
- tag);
- return 0;
- }
- _delete_it ((argc - 1), &argv[1]);
- }
- else if (strncasecmp (tag, "verbose", MAX(tag_len, 4)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for %s keyword\n",
- tag);
- }
- quiet_flag = -1;
- }
- else if (strncasecmp (tag, "version", MAX(tag_len, 4)) == 0) {
- if (argc > 1) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for %s keyword\n",
- tag);
- }
- _print_version();
- }
- else if (strncasecmp (tag, "listpids", MAX(tag_len, 1)) == 0) {
- if (argc > 3) {
- exit_code = 1;
- fprintf (stderr,
- "too many arguments for keyword:%s\n",
- tag);
- } else {
- scontrol_list_pids (argc == 1 ? NULL : argv[1],
- argc <= 2 ? NULL : argv[2]);
- }
- }
- else if (strncasecmp (tag, "notify", MAX(tag_len, 1)) == 0) {
- if (argc < 3) {
- exit_code = 1;
- fprintf (stderr,
- "too few arguments for keyword:%s\n",
- tag);
- } else if (scontrol_job_notify(argc-1, &argv[1])) {
- exit_code = 1;
- slurm_perror("job notify failure");
- }
- }
- else {
- exit_code = 1;
- fprintf (stderr, "invalid keyword: %s\n", tag);
- }
- return 0;
- }
- /*
- * _create_it - create a slurm configuration per the supplied arguments
- * IN argc - count of arguments
- * IN argv - list of arguments
- */
- static void
- _create_it (int argc, char *argv[])
- {
- /* Scan for "res" first, anywhere in the args. When creating
- a reservation there is a partition= option, which we don't
- want to mistake for a requestion to create a partition. */
- int i, error_code = SLURM_SUCCESS;
- for (i=0; i<argc; i++) {
- char *tag = argv[i];
- char *val = strchr(argv[i], '=');
- int tag_len;
- if (val) {
- tag_len = val - argv[i];
- val++;
- } else {
- tag_len = strlen(tag);
- }
- if (!strncasecmp(tag, "ReservationName", MAX(tag_len, 3))) {
- error_code = scontrol_create_res(argc, argv);
- break;
- } else if (!strncasecmp(tag, "PartitionName", MAX(tag_len, 3))) {
- error_code = scontrol_create_part(argc, argv);
- break;
- }
- }
- if (i >= argc) {
- exit_code = 1;
- error("Invalid creation entity: %s", argv[0]);
- } else if (error_code)
- exit_code = 1;
- }
- /*
- * _delete_it - delete the specified slurm entity
- * IN argc - count of arguments
- * IN argv - list of arguments
- */
- static void
- _delete_it (int argc, char *argv[])
- {
- char *tag = NULL, *val = NULL;
- int tag_len = 0;
- if (argc != 1) {
- error("Only one option follows delete. %d given.", argc);
- exit_code = 1;
- return;
- }
- tag = argv[0];
- val = strchr(argv[0], '=');
- if (val) {
- tag_len = val - argv[0];
- val++;
- } else {
- error("Proper format is 'delete Partition=p'"
- " or 'delete Reservation=r'");
- exit_code = 1;
- return;
- }
- /* First identify the entity type to delete */
- if (strncasecmp (tag, "PartitionName", MAX(tag_len, 3)) == 0) {
- delete_part_msg_t part_msg;
- part_msg.name = val;
- if (slurm_delete_partition(&part_msg)) {
- char errmsg[64];
- snprintf(errmsg, 64, "delete_partition %s", argv[0]);
- slurm_perror(errmsg);
- }
- } else if (strncasecmp (tag, "ReservationName", MAX(tag_len, 3)) == 0) {
- reservation_name_msg_t res_msg;
- res_msg.name = val;
- if (slurm_delete_reservation(&res_msg)) {
- char errmsg[64];
- snprintf(errmsg, 64, "delete_reservation %s", argv[0]);
- slurm_perror(errmsg);
- }
- } else if (strncasecmp (tag, "BlockName", MAX(tag_len, 3)) == 0) {
- if(cluster_flags & CLUSTER_FLAG_BG) {
- update_block_msg_t block_msg;
- slurm_init_update_block_msg ( &block_msg );
- block_msg.bg_block_id = val;
- block_msg.state = BG_BLOCK_NAV;
- if (slurm_update_block(&block_msg)) {
- char errmsg[64];
- snprintf(errmsg, 64, "delete_block %s",
- argv[0]);
- slurm_perror(errmsg);
- }
- } else {
- exit_code = 1;
- fprintf(stderr,
- "This only works on a bluegene system.\n");
- }
- } else {
- exit_code = 1;
- fprintf(stderr, "Invalid deletion entity: %s\n", argv[0]);
- }
- }
- /*
- * _show_it - print a description of the specified slurm entity
- * IN argc - count of arguments
- * IN argv - list of arguments
- */
- static void
- _show_it (int argc, char *argv[])
- {
- char *tag = NULL, *val = NULL;
- int tag_len = 0;
- if (argc > 3) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- argv[0]);
- return;
- }
- else if (argc < 2) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too few arguments for keyword:%s\n", argv[0]);
- return;
- }
- tag = argv[1];
- tag_len = strlen(tag);
- val = strchr(argv[1], '=');
- if (val) {
- tag_len = val - argv[1];
- val++;
- } else if (argc == 3) {
- val = argv[2];
- } else {
- val = NULL;
- }
- if (strncasecmp (tag, "aliases", MAX(tag_len, 1)) == 0) {
- if (val)
- _print_aliases (val);
- else
- _print_aliases (NULL);
- } else if (strncasecmp (tag, "blocks", MAX(tag_len, 1)) == 0) {
- scontrol_print_block (val);
- } else if (strncasecmp (tag, "config", MAX(tag_len, 1)) == 0) {
- _print_config (val);
- } else if (strncasecmp (tag, "daemons", MAX(tag_len, 1)) == 0) {
- if (val) {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr,
- "too many arguments for keyword:%s\n",
- argv[0]);
- }
- _print_daemons ();
- } else if (strncasecmp (tag, "FrontendName", MAX(tag_len, 1)) == 0) {
- scontrol_print_front_end_list(val);
- } else if (strncasecmp (tag, "hostnames", MAX(tag_len, 5)) == 0) {
- if (val)
- scontrol_print_hosts(val);
- else
- scontrol_print_hosts(getenv("SLURM_NODELIST"));
- } else if (strncasecmp (tag, "hostlist", MAX(tag_len, 5)) == 0) {
- if (!val) {
- exit_code = 1;
- fprintf(stderr, "invalid encode argument\n");
- _usage();
- } else if (scontrol_encode_hostlist(val))
- exit_code = 1;
- } else if (strncasecmp (tag, "jobs", MAX(tag_len, 1)) == 0 ||
- strncasecmp (tag, "jobid", MAX(tag_len, 1)) == 0 ) {
- scontrol_print_job (val);
- } else if (strncasecmp (tag, "nodes", MAX(tag_len, 1)) == 0) {
- scontrol_print_node_list (val);
- } else if (strncasecmp (tag, "partitions", MAX(tag_len, 1)) == 0 ||
- strncasecmp (tag, "partitionname", MAX(tag_len, 1)) == 0) {
- scontrol_print_part (val);
- } else if (strncasecmp (tag, "reservations", MAX(tag_len, 1)) == 0 ||
- strncasecmp (tag, "reservationname", MAX(tag_len, 1)) == 0) {
- scontrol_print_res (val);
- } else if (strncasecmp (tag, "slurmd", MAX(tag_len, 2)) == 0) {
- _print_slurmd (val);
- } else if (strncasecmp (tag, "steps", MAX(tag_len, 2)) == 0) {
- scontrol_print_step (val);
- } else if (strncasecmp (tag, "topology", MAX(tag_len, 1)) == 0) {
- scontrol_print_topo (val);
- } else {
- exit_code = 1;
- if (quiet_flag != 1)
- fprintf (stderr,
- "invalid entity:%s for keyword:%s \n",
- tag, argv[0]);
- }
- }
- /*
- * _update_it - update the slurm configuration per the supplied arguments
- * IN argc - count of arguments
- * IN argv - list of arguments
- */
- static void
- _update_it (int argc, char *argv[])
- {
- char *val = NULL;
- int i, error_code = SLURM_SUCCESS;
- int node_tag = 0, part_tag = 0, job_tag = 0;
- int block_tag = 0, sub_tag = 0, res_tag = 0;
- int debug_tag = 0, step_tag = 0, front_end_tag = 0;
- /* First identify the entity to update */
- for (i=0; i<argc; i++) {
- char *tag = argv[i];
- int tag_len = 0;
- val = strchr(argv[i], '=');
- if (!val)
- continue;
- tag_len = val - argv[i];
- val++;
- if (!strncasecmp(tag, "NodeName", MAX(tag_len, 3))) {
- node_tag = 1;
- } else if (!strncasecmp(tag, "PartitionName",
- MAX(tag_len, 3))) {
- part_tag = 1;
- } else if (!strncasecmp(tag, "JobId", MAX(tag_len, 3))) {
- job_tag = 1;
- } else if (!strncasecmp(tag, "StepId", MAX(tag_len, 4))) {
- step_tag = 1;
- } else if (!strncasecmp(tag, "BlockName", MAX(tag_len, 3))) {
- block_tag = 1;
- } else if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 3))
- || !strncasecmp(tag, "SubMPName", MAX(tag_len, 3))) {
- sub_tag = 1;
- } else if (!strncasecmp(tag, "FrontendName",
- MAX(tag_len, 2))) {
- front_end_tag = 1;
- } else if (!strncasecmp(tag, "ReservationName",
- MAX(tag_len, 3))) {
- res_tag = 1;
- } else if (!strncasecmp(tag, "SlurmctldDebug",
- MAX(tag_len, 2))) {
- debug_tag= 1;
- }
- }
- /* The order of tests matters here. An update job request can include
- * partition and reservation tags, possibly before the jobid tag, but
- * none of the other updates have a jobid tag, so check jobtag first.
- * Likewise, check restag next, because reservations can have a
- * partition tag. The order of the rest doesn't matter because there
- * aren't any other duplicate tags. */
- if (job_tag)
- error_code = scontrol_update_job (argc, argv);
- else if (step_tag)
- error_code = scontrol_update_step (argc, argv);
- else if (res_tag)
- error_code = scontrol_update_res (argc, argv);
- else if (node_tag)
- error_code = scontrol_update_node (argc, argv);
- else if (front_end_tag)
- error_code = scontrol_update_front_end (argc, argv);
- else if (part_tag)
- error_code = scontrol_update_part (argc, argv);
- else if (block_tag)
- error_code = _update_bluegene_block (argc, argv);
- else if (sub_tag)
- error_code = _update_bluegene_subbp (argc, argv);
- else if (debug_tag)
- error_code = _update_slurmctld_debug(val);
- else {
- exit_code = 1;
- fprintf(stderr, "No valid entity in update command\n");
- fprintf(stderr, "Input line must include \"NodeName\", ");
- if(cluster_flags & CLUSTER_FLAG_BG) {
- fprintf(stderr, "\"BlockName\", \"SubMPName\" "
- "(i.e. bgl000[0-3]),");
- }
- fprintf(stderr, "\"PartitionName\", \"Reservation\", "
- "\"JobId\", or \"SlurmctldDebug\" \n");
- }
- if (error_code) {
- exit_code = 1;
- slurm_perror ("slurm_update error");
- }
- }
- /*
- * _update_bluegene_block - update the bluegene block per the
- * supplied arguments
- * IN argc - count of arguments
- * IN argv - list of arguments
- * RET 0 if no slurm error, errno otherwise. parsing error prints
- * error message and returns 0
- */
- static int
- _update_bluegene_block (int argc, char *argv[])
- {
- int i, update_cnt = 0;
- update_block_msg_t block_msg;
- if(!(cluster_flags & CLUSTER_FLAG_BG)) {
- exit_code = 1;
- fprintf(stderr, "This only works on a bluegene system.\n");
- return 0;
- }
- slurm_init_update_block_msg ( &block_msg );
- for (i=0; i<argc; i++) {
- char *tag = argv[i];
- char *val = strchr(argv[i], '=');
- int tag_len = 0, vallen = 0;
- if (val) {
- tag_len = val - argv[i];
- val++;
- vallen = strlen(val);
- } else {
- exit_code = 1;
- error("Invalid input for BlueGene block "
- "update %s",
- argv[i]);
- return 0;
- }
- if (!strncasecmp(tag, "BlockName", MAX(tag_len, 2))) {
- block_msg.bg_block_id = val;
- } else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) {
- if (!strncasecmp(val, "ERROR", MAX(vallen, 1)))
- block_msg.state = BG_BLOCK_ERROR_FLAG;
- else if (!strncasecmp(val, "FREE", MAX(vallen, 1)))
- block_msg.state = BG_BLOCK_FREE;
- else if (!strncasecmp(val, "RECREATE", MAX(vallen, 3)))
- block_msg.state = BG_BLOCK_BOOTING;
- else if (!strncasecmp(val, "REMOVE", MAX(vallen, 3)))
- block_msg.state = BG_BLOCK_NAV;
- else if (!strncasecmp(val, "RESUME", MAX(vallen, 3)))
- block_msg.state = BG_BLOCK_TERM;
- else {
- exit_code = 1;
- fprintf (stderr, "Invalid input: %s\n",
- argv[i]);
- fprintf (stderr,
- "Acceptable State values "
- "are ERROR, FREE, RECREATE, "
- "REMOVE, RESUME\n");
- return 0;
- }
- update_cnt++;
- } else {
- exit_code = 1;
- error("Invalid input for BlueGene block update %s",
- argv[i]);
- return 0;
- }
- }
- if(!block_msg.bg_block_id) {
- error("You didn't supply a block name.");
- return 0;
- } else if (block_msg.state == (uint16_t)NO_VAL) {
- error("You didn't give me a state to set %s to "
- "(i.e. FREE, ERROR).", block_msg.mp_str);
- return 0;
- }
- if (slurm_update_block(&block_msg)) {
- exit_code = 1;
- return slurm_get_errno ();
- } else
- return 0;
- }
- /*
- * _update_bluegene_subbp - update the bluegene nodecards per the
- * supplied arguments
- * IN argc - count of arguments
- * IN argv - list of arguments
- * RET 0 if no slurm error, errno otherwise. parsing error prints
- * error message and returns 0
- */
- static int
- _update_bluegene_subbp (int argc, char *argv[])
- {
- int i, update_cnt = 0;
- update_block_msg_t block_msg;
- if(!(cluster_flags & CLUSTER_FLAG_BG)) {
- exit_code = 1;
- fprintf(stderr, "This only works on a bluegene system.\n");
- return 0;
- }
- slurm_init_update_block_msg ( &block_msg );
- for (i=0; i<argc; i++) {
- char *tag = argv[i];
- char *val = strchr(argv[i], '=');
- int tag_len = 0, vallen = 0;
- if (val) {
- tag_len = val - argv[i];
- val++;
- vallen = strlen(val);
- } else {
- exit_code = 1;
- error("Invalid input for BlueGene SubMPName update %s",
- argv[i]);
- return 0;
- }
- if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 2))
- || !strncasecmp(tag, "SubMPName", MAX(tag_len, 2)))
- block_msg.mp_str = val;
- else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) {
- if (!strncasecmp(val, "ERROR", MAX(vallen, 1)))
- block_msg.state = BG_BLOCK_ERROR_FLAG;
- else if (!strncasecmp(val, "FREE", MAX(vallen, 1)))
- block_msg.state = BG_BLOCK_FREE;
- else {
- exit_code = 1;
- fprintf (stderr, "Invalid input: %s\n",
- argv[i]);
- fprintf (stderr, "Acceptable State values "
- "are FREE and ERROR\n");
- return 0;
- }
- update_cnt++;
- } else {
- exit_code = 1;
- error("Invalid input for BlueGene SubMPName update %s",
- argv[i]);
- return 0;
- }
- }
- if(!block_msg.mp_str) {
- error("You didn't supply an ionode list.");
- return 0;
- } else if (block_msg.state == (uint16_t)NO_VAL) {
- error("You didn't give me a state to set %s to "
- "(i.e. FREE, ERROR).", block_msg.mp_str);
- return 0;
- }
- if (slurm_update_block(&block_msg)) {
- exit_code = 1;
- return slurm_get_errno ();
- } else
- return 0;
- }
- /*
- * _update_slurmctld_debug - update the slurmctld debug level
- * IN val - new value
- * RET 0 if no slurm error, errno otherwise. parsing error prints
- * error message and returns 0
- */
- static int _update_slurmctld_debug(char *val)
- {
- char *endptr;
- int error_code = SLURM_SUCCESS;
- uint32_t level = (uint32_t)strtoul(val, &endptr, 10);
- if (*endptr != '\0' || level > 9) {
- error_code = 1;
- if (quiet_flag != 1)
- fprintf(stderr, "invalid debug level: %s\n",
- val);
- } else {
- error_code = slurm_set_debug_level(level);
- }
- return error_code;
- }
- /* _usage - show the valid scontrol commands */
- void
- _usage () {
- printf ("\
- scontrol [<OPTION>] [<COMMAND>] \n\
- Valid <OPTION> values are: \n\
- -a or --all: equivalent to \"all\" command \n\
- -d or --details: equivalent to \"details\" command \n\
- -h or --help: equivalent to \"help\" command \n\
- --hide: equivalent to \"hide\" command \n\
- -M or --cluster: equivalent to \"cluster\" command \n\
- -o or --oneliner: equivalent to \"oneliner\" command \n\
- -Q or --quiet: equivalent to \"quiet\" command \n\
- -v or --verbose: equivalent to \"verbose\" command \n\
- -V or --version: equivalent to \"version\" command \n\
- \n\
- <keyword> may be omitted from the execute line and scontrol will execute \n\
- in interactive mode. It will process commands as entered until explicitly\n\
- terminated. \n\
- \n\
- Valid <COMMAND> values are: \n\
- abort shutdown slurm controller immediately \n\
- generating a core file. \n\
- all display information about all partitions, \n\
- including hidden partitions. \n\
- cluster cluster to issue commands to. Default is \n\
- current cluster. cluster with no name will \n\
- reset to default. \n\
- checkpoint <CH_OP><ID> perform a checkpoint operation on identified \n\
- job or job step \n\
- completing display jobs in completing state along with \n\
- their completing or down nodes \n\
- create <SPECIFICATIONS> create a new partition or reservation \n\
- details evokes additional details from the \"show\" \n\
- command \n\
- delete <SPECIFICATIONS> delete the specified partition or reservation\n\
- On Dynamic layout Bluegene systems you can also\n\
- delete blocks. \n\
- exit terminate scontrol \n\
- help print this description of use. \n\
- hold <job_id> prevent specified job from starting (see release)\n\
- holdu <job_id> place user hold on specified job (see release)\n\
- hide do not display information about hidden \n\
- partitions \n\
- listpids <job_id<.step>> List pids associated with the given jobid, or\n\
- all jobs if no id is given (This will only \n\
- display the processes on the node which the \n\
- scontrol is ran on, and only for those \n\
- processes spawned by SLURM and their \n\
- descendants) \n\
- notify <job_id> msg send message to specified job \n\
- oneliner report output one record per line. \n\
- pidinfo <pid> return slurm job information for given pid. \n\
- ping print status of slurmctld daemons. \n\
- quiet print no messages other than error messages. \n\
- quit terminate this command. \n\
- reboot_nodes [<nodelist>] reboot the nodes when they become idle. \n\
- By default all nodes are rebooted. \n\
- reconfigure re-read configuration files. \n\
- release <job_id> permit specified job to start (see hold) \n\
- requeue <job_id> re-queue a batch job \n\
- resume <job_id> resume previously suspended job (see suspend)\n\
- setdebug <level> set slurmctld debug level \n\
- setdebugflags [+|-]<flag> add or remove slurmctld DebugFlags \n\
- schedloglevel <slevel> set scheduler log level \n\
- show <ENTITY> [<ID>] display state of identified entity, default \n\
- is all records. \n\
- shutdown <OPTS> shutdown slurm daemons \n\
- (the primary controller will be stopped) \n\
- suspend <job_id> susend specified job (see resume) \n\
- takeover ask slurm backup controller to take over \n\
- uhold <job_id> place user hold on specified job (see release)\n\
- update <SPECIFICATIONS> update job, node, partition, reservation, \n\
- step or bluegene block/subbp configuration \n\
- verbose enable detailed logging. \n\
- version display tool version number. \n\
- wait_job <job_id> wait until the nodes allocated to the job \n\
- are booted and usable \n\
- !! Repeat the last command entered. \n\
- \n\
- <ENTITY> may be \"aliases\", \"config\", \"daemons\", \"frontend\", \n\
- \"hostlist\", \"hostnames\", \"job\", \"node\", \"partition\", \n\
- \"reservation\", \"slurmd\", \"step\", or \"topology\" \n\
- (also for BlueGene only: \"block\" or \"subbp\"). \n\
- \n\
- <ID> may be a configuration parameter name, job id, node name, partition \n\
- name, reservation name, job step id, or hostlist or pathname to a \n\
- …
Large files files are truncated, but you can click here to view the full file