PageRenderTime 30ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/src/scontrol/scontrol.c

https://github.com/cfenoy/slurm
C | 1841 lines | 1582 code | 114 blank | 145 comment | 590 complexity | be3550085834976c42243828dca8ab2a MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * scontrol.c - administration tool for slurm.
  3. * provides interface to read, write, update, and configurations.
  4. *****************************************************************************
  5. * Copyright (C) 2002-2007 The Regents of the University of California.
  6. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  7. * Portions Copyright (C) 2008 Vijay Ramasubramanian.
  8. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  9. * Written by Morris Jette <jette1@llnl.gov>
  10. * CODE-OCEC-09-009. All rights reserved.
  11. *
  12. * This file is part of SLURM, a resource management program.
  13. * For details, see <http://www.schedmd.com/slurmdocs/>.
  14. * Please also read the included file: DISCLAIMER.
  15. *
  16. * SLURM is free software; you can redistribute it and/or modify it under
  17. * the terms of the GNU General Public License as published by the Free
  18. * Software Foundation; either version 2 of the License, or (at your option)
  19. * any later version.
  20. *
  21. * In addition, as a special exception, the copyright holders give permission
  22. * to link the code of portions of this program with the OpenSSL library under
  23. * certain conditions as described in each individual source file, and
  24. * distribute linked combinations including the two. You must obey the GNU
  25. * General Public License in all respects for all of the code used other than
  26. * OpenSSL. If you modify file(s) with this exception, you may extend this
  27. * exception to your version of the file(s), but you are not obligated to do
  28. * so. If you do not wish to do so, delete this exception statement from your
  29. * version. If you delete this exception statement from all source files in
  30. * the program, then also delete it here.
  31. *
  32. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  33. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  34. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  35. * details.
  36. *
  37. * You should have received a copy of the GNU General Public License along
  38. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  39. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  40. \*****************************************************************************/
  41. #include "scontrol.h"
  42. #include "src/plugins/select/bluegene/bg_enums.h"
  43. #include "src/common/proc_args.h"
  44. #define OPT_LONG_HIDE 0x102
  45. char *command_name;
  46. List clusters = NULL;
  47. int all_flag; /* display even hidden partitions */
  48. int detail_flag; /* display additional details */
  49. int exit_code; /* scontrol's exit code, =1 on any error at any time */
  50. int exit_flag; /* program to terminate if =1 */
  51. int input_words; /* number of words of input permitted */
  52. int one_liner; /* one record per line if =1 */
  53. int quiet_flag; /* quiet=1, verbose=-1, normal=0 */
  54. int verbosity; /* count of "-v" options */
  55. uint32_t cluster_flags; /* what type of cluster are we talking to */
  56. block_info_msg_t *old_block_info_ptr = NULL;
  57. front_end_info_msg_t *old_front_end_info_ptr = NULL;
  58. job_info_msg_t *old_job_info_ptr = NULL;
  59. node_info_msg_t *old_node_info_ptr = NULL;
  60. partition_info_msg_t *old_part_info_ptr = NULL;
  61. reserve_info_msg_t *old_res_info_ptr = NULL;
  62. slurm_ctl_conf_info_msg_t *old_slurm_ctl_conf_ptr = NULL;
  63. static void _create_it (int argc, char *argv[]);
  64. static void _delete_it (int argc, char *argv[]);
  65. static void _show_it (int argc, char *argv[]);
  66. static int _get_command (int *argc, char *argv[]);
  67. static void _ping_slurmctld(char *control_machine,
  68. char *backup_controller);
  69. static void _print_config (char *config_param);
  70. static void _print_daemons (void);
  71. static void _print_aliases (char* node_hostname);
  72. static void _print_ping (void);
  73. static void _print_slurmd(char *hostlist);
  74. static void _print_version( void );
  75. static int _process_command (int argc, char *argv[]);
  76. static void _update_it (int argc, char *argv[]);
  77. static int _update_bluegene_block (int argc, char *argv[]);
  78. static int _update_bluegene_subbp (int argc, char *argv[]);
  79. static int _update_slurmctld_debug(char *val);
  80. static void _usage ();
  81. int
  82. main (int argc, char *argv[])
  83. {
  84. int error_code = SLURM_SUCCESS, i, opt_char, input_field_count;
  85. char **input_fields, *env_val;
  86. log_options_t opts = LOG_OPTS_STDERR_ONLY ;
  87. int option_index;
  88. static struct option long_options[] = {
  89. {"all", 0, 0, 'a'},
  90. {"cluster", 1, 0, 'M'},
  91. {"clusters", 1, 0, 'M'},
  92. {"details", 0, 0, 'd'},
  93. {"help", 0, 0, 'h'},
  94. {"hide", 0, 0, OPT_LONG_HIDE},
  95. {"oneliner", 0, 0, 'o'},
  96. {"quiet", 0, 0, 'Q'},
  97. {"usage", 0, 0, 'h'},
  98. {"verbose", 0, 0, 'v'},
  99. {"version", 0, 0, 'V'},
  100. {NULL, 0, 0, 0}
  101. };
  102. command_name = argv[0];
  103. all_flag = 0;
  104. detail_flag = 0;
  105. exit_code = 0;
  106. exit_flag = 0;
  107. input_field_count = 0;
  108. quiet_flag = 0;
  109. verbosity = 0;
  110. log_init("scontrol", opts, SYSLOG_FACILITY_DAEMON, NULL);
  111. if (getenv ("SCONTROL_ALL"))
  112. all_flag= 1;
  113. if ((env_val = getenv("SLURM_CLUSTERS"))) {
  114. if (!(clusters = slurmdb_get_info_cluster(env_val))) {
  115. error("'%s' can't be reached now, "
  116. "or it is an invalid entry for "
  117. "SLURM_CLUSTERS. Use 'sacctmgr --list "
  118. "cluster' to see available clusters.",
  119. env_val);
  120. exit(1);
  121. }
  122. working_cluster_rec = list_peek(clusters);
  123. }
  124. while((opt_char = getopt_long(argc, argv, "adhM:oQvV",
  125. long_options, &option_index)) != -1) {
  126. switch (opt_char) {
  127. case (int)'?':
  128. fprintf(stderr, "Try \"scontrol --help\" for "
  129. "more information\n");
  130. exit(1);
  131. break;
  132. case (int)'a':
  133. all_flag = 1;
  134. break;
  135. case (int)'d':
  136. detail_flag++;
  137. break;
  138. case (int)'h':
  139. _usage ();
  140. exit(exit_code);
  141. break;
  142. case OPT_LONG_HIDE:
  143. all_flag = 0;
  144. detail_flag = 0;
  145. break;
  146. case (int)'M':
  147. if (clusters) {
  148. list_destroy(clusters);
  149. clusters = NULL;
  150. working_cluster_rec = NULL;
  151. }
  152. if (!(clusters = slurmdb_get_info_cluster(optarg))) {
  153. error("'%s' can't be reached now, "
  154. "or it is an invalid entry for "
  155. "--cluster. Use 'sacctmgr --list "
  156. "cluster' to see available clusters.",
  157. optarg);
  158. exit(1);
  159. }
  160. working_cluster_rec = list_peek(clusters);
  161. break;
  162. case (int)'o':
  163. one_liner = 1;
  164. break;
  165. case (int)'Q':
  166. quiet_flag = 1;
  167. break;
  168. case (int)'v':
  169. quiet_flag = -1;
  170. verbosity++;
  171. break;
  172. case (int)'V':
  173. _print_version();
  174. exit(exit_code);
  175. break;
  176. default:
  177. exit_code = 1;
  178. fprintf(stderr, "getopt error, returned %c\n",
  179. opt_char);
  180. exit(exit_code);
  181. }
  182. }
  183. if (clusters && (list_count(clusters) > 1))
  184. fatal("Only one cluster can be used at a time with scontrol");
  185. cluster_flags = slurmdb_setup_cluster_flags();
  186. if (verbosity) {
  187. opts.stderr_level += verbosity;
  188. log_alter(opts, SYSLOG_FACILITY_USER, NULL);
  189. }
  190. if (argc > MAX_INPUT_FIELDS) /* bogus input, but continue anyway */
  191. input_words = argc;
  192. else
  193. input_words = 128;
  194. input_fields = (char **) xmalloc (sizeof (char *) * input_words);
  195. if (optind < argc) {
  196. for (i = optind; i < argc; i++) {
  197. input_fields[input_field_count++] = argv[i];
  198. }
  199. }
  200. if (input_field_count)
  201. exit_flag = 1;
  202. else
  203. error_code = _get_command (&input_field_count, input_fields);
  204. while (error_code == SLURM_SUCCESS) {
  205. error_code = _process_command (input_field_count,
  206. input_fields);
  207. if (error_code || exit_flag)
  208. break;
  209. error_code = _get_command (&input_field_count, input_fields);
  210. }
  211. if (clusters)
  212. list_destroy(clusters);
  213. exit(exit_code);
  214. }
  215. static void _print_version(void)
  216. {
  217. print_slurm_version();
  218. if (quiet_flag == -1) {
  219. long version = slurm_api_version();
  220. printf("slurm_api_version: %ld, %ld.%ld.%ld\n", version,
  221. SLURM_VERSION_MAJOR(version),
  222. SLURM_VERSION_MINOR(version),
  223. SLURM_VERSION_MICRO(version));
  224. }
  225. }
  226. #if !HAVE_READLINE
  227. /*
  228. * Alternative to readline if readline is not available
  229. */
  230. static char *_getline(const char *prompt)
  231. {
  232. char buf[4096];
  233. char *line;
  234. int len;
  235. printf("%s", prompt);
  236. /* we only set this here to avoid a warning. We throw it away
  237. later. */
  238. line = fgets(buf, 4096, stdin);
  239. len = strlen(buf);
  240. if ((len > 0) && (buf[len-1] == '\n'))
  241. buf[len-1] = '\0';
  242. else
  243. len++;
  244. line = malloc (len * sizeof(char));
  245. return strncpy(line, buf, len);
  246. }
  247. #endif
  248. /*
  249. * _get_command - get a command from the user
  250. * OUT argc - location to store count of arguments
  251. * OUT argv - location to store the argument list
  252. */
  253. static int
  254. _get_command (int *argc, char **argv)
  255. {
  256. char *in_line;
  257. static char *last_in_line = NULL;
  258. int i, in_line_size;
  259. static int last_in_line_size = 0;
  260. *argc = 0;
  261. #if HAVE_READLINE
  262. in_line = readline ("scontrol: ");
  263. #else
  264. in_line = _getline("scontrol: ");
  265. #endif
  266. if (in_line == NULL)
  267. return 0;
  268. else if (strcmp (in_line, "!!") == 0) {
  269. free (in_line);
  270. in_line = last_in_line;
  271. in_line_size = last_in_line_size;
  272. } else {
  273. if (last_in_line)
  274. free (last_in_line);
  275. last_in_line = in_line;
  276. last_in_line_size = in_line_size = strlen (in_line);
  277. }
  278. #if HAVE_READLINE
  279. add_history(in_line);
  280. #endif
  281. /* break in_line into tokens */
  282. for (i = 0; i < in_line_size; i++) {
  283. bool double_quote = false, single_quote = false;
  284. if (in_line[i] == '\0')
  285. break;
  286. if (isspace ((int) in_line[i]))
  287. continue;
  288. if (((*argc) + 1) > MAX_INPUT_FIELDS) { /* bogus input line */
  289. exit_code = 1;
  290. fprintf (stderr,
  291. "%s: can not process over %d words\n",
  292. command_name, input_words);
  293. return E2BIG;
  294. }
  295. argv[(*argc)++] = &in_line[i];
  296. for (i++; i < in_line_size; i++) {
  297. if (in_line[i] == '\042') {
  298. double_quote = !double_quote;
  299. continue;
  300. }
  301. if (in_line[i] == '\047') {
  302. single_quote = !single_quote;
  303. continue;
  304. }
  305. if (in_line[i] == '\0')
  306. break;
  307. if (double_quote || single_quote)
  308. continue;
  309. if (isspace ((int) in_line[i])) {
  310. in_line[i] = '\0';
  311. break;
  312. }
  313. }
  314. }
  315. return 0;
  316. }
  317. /*
  318. * _print_config - print the specified configuration parameter and value
  319. * IN config_param - NULL to print all parameters and values
  320. */
  321. static void
  322. _print_config (char *config_param)
  323. {
  324. int error_code;
  325. slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL;
  326. if (old_slurm_ctl_conf_ptr) {
  327. error_code = slurm_load_ctl_conf (
  328. old_slurm_ctl_conf_ptr->last_update,
  329. &slurm_ctl_conf_ptr);
  330. if (error_code == SLURM_SUCCESS)
  331. slurm_free_ctl_conf(old_slurm_ctl_conf_ptr);
  332. else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
  333. slurm_ctl_conf_ptr = old_slurm_ctl_conf_ptr;
  334. error_code = SLURM_SUCCESS;
  335. if (quiet_flag == -1) {
  336. printf ("slurm_load_ctl_conf no change "
  337. "in data\n");
  338. }
  339. }
  340. }
  341. else
  342. error_code = slurm_load_ctl_conf ((time_t) NULL,
  343. &slurm_ctl_conf_ptr);
  344. if (error_code) {
  345. exit_code = 1;
  346. if (quiet_flag != 1)
  347. slurm_perror ("slurm_load_ctl_conf error");
  348. }
  349. else
  350. old_slurm_ctl_conf_ptr = slurm_ctl_conf_ptr;
  351. if (error_code == SLURM_SUCCESS) {
  352. slurm_print_ctl_conf (stdout, slurm_ctl_conf_ptr) ;
  353. fprintf(stdout, "\n");
  354. }
  355. if (slurm_ctl_conf_ptr)
  356. _ping_slurmctld (slurm_ctl_conf_ptr->control_machine,
  357. slurm_ctl_conf_ptr->backup_controller);
  358. }
  359. /* Print slurmd status on localhost.
  360. * Parse hostlist in the future */
  361. static void _print_slurmd(char *hostlist)
  362. {
  363. slurmd_status_t *slurmd_status;
  364. if (slurm_load_slurmd_status(&slurmd_status)) {
  365. exit_code = 1;
  366. if (quiet_flag != 1)
  367. slurm_perror("slurm_load_slurmd_status");
  368. } else {
  369. slurm_print_slurmd_status(stdout, slurmd_status);
  370. slurm_free_slurmd_status(slurmd_status);
  371. }
  372. }
  373. /* Print state of controllers only */
  374. static void
  375. _print_ping (void)
  376. {
  377. slurm_ctl_conf_info_msg_t *conf;
  378. char *primary, *secondary;
  379. slurm_conf_init(NULL);
  380. conf = slurm_conf_lock();
  381. primary = xstrdup(conf->control_machine);
  382. secondary = xstrdup(conf->backup_controller);
  383. slurm_conf_unlock();
  384. _ping_slurmctld (primary, secondary);
  385. xfree(primary);
  386. xfree(secondary);
  387. }
  388. /* Report if slurmctld daemons are responding */
  389. static void
  390. _ping_slurmctld(char *control_machine, char *backup_controller)
  391. {
  392. static char *state[2] = { "UP", "DOWN" };
  393. int primary = 1, secondary = 1;
  394. int down_msg = 0;
  395. if (slurm_ping(1) == SLURM_SUCCESS)
  396. primary = 0;
  397. if (slurm_ping(2) == SLURM_SUCCESS)
  398. secondary = 0;
  399. fprintf(stdout, "Slurmctld(primary/backup) ");
  400. if (control_machine || backup_controller) {
  401. fprintf(stdout, "at ");
  402. if (control_machine) {
  403. fprintf(stdout, "%s/", control_machine);
  404. if (primary)
  405. down_msg = 1;
  406. } else
  407. fprintf(stdout, "(NULL)/");
  408. if (backup_controller) {
  409. fprintf(stdout, "%s ", backup_controller);
  410. if (secondary)
  411. down_msg = 1;
  412. } else
  413. fprintf(stdout, "(NULL) ");
  414. }
  415. fprintf(stdout, "are %s/%s\n",
  416. state[primary], state[secondary]);
  417. if (down_msg && (getuid() == 0)) {
  418. fprintf(stdout, "*****************************************\n");
  419. fprintf(stdout, "** RESTORE SLURMCTLD DAEMON TO SERVICE **\n");
  420. fprintf(stdout, "*****************************************\n");
  421. }
  422. }
  423. /*
  424. * _print_daemons - report what daemons should be running on this node
  425. */
  426. static void
  427. _print_daemons (void)
  428. {
  429. slurm_ctl_conf_info_msg_t *conf;
  430. char me[MAX_SLURM_NAME], *b, *c, *n, *token, *save_ptr = NULL;
  431. int actld = 0, ctld = 0, d = 0;
  432. char daemon_list[] = "slurmctld slurmd";
  433. slurm_conf_init(NULL);
  434. conf = slurm_conf_lock();
  435. gethostname_short(me, MAX_SLURM_NAME);
  436. if ((b = conf->backup_controller)) {
  437. if ((strcmp(b, me) == 0) ||
  438. (strcasecmp(b, "localhost") == 0))
  439. ctld = 1;
  440. }
  441. if (conf->control_machine) {
  442. actld = 1;
  443. c = xstrdup(conf->control_machine);
  444. token = strtok_r(c, ",", &save_ptr);
  445. while (token) {
  446. if ((strcmp(token, me) == 0) ||
  447. (strcasecmp(token, "localhost") == 0)) {
  448. ctld = 1;
  449. break;
  450. }
  451. token = strtok_r(NULL, ",", &save_ptr);
  452. }
  453. xfree(c);
  454. }
  455. slurm_conf_unlock();
  456. if ((n = slurm_conf_get_nodename(me))) {
  457. d = 1;
  458. xfree(n);
  459. } else if ((n = slurm_conf_get_aliased_nodename())) {
  460. d = 1;
  461. xfree(n);
  462. } else if ((n = slurm_conf_get_nodename("localhost"))) {
  463. d = 1;
  464. xfree(n);
  465. }
  466. strcpy(daemon_list, "");
  467. if (actld && ctld)
  468. strcat(daemon_list, "slurmctld ");
  469. if (actld && d)
  470. strcat(daemon_list, "slurmd");
  471. fprintf (stdout, "%s\n", daemon_list) ;
  472. }
  473. /*
  474. * _print_aliases - report which aliases should be running on this node
  475. */
  476. static void
  477. _print_aliases (char* node_hostname)
  478. {
  479. char me[MAX_SLURM_NAME], *n = NULL, *a = NULL;
  480. char *s;
  481. slurm_conf_init(NULL);
  482. if (!node_hostname) {
  483. gethostname_short(me, MAX_SLURM_NAME);
  484. s = me;
  485. } else
  486. s = node_hostname;
  487. if (!(n = slurm_conf_get_aliases(s)) && (s == me)) {
  488. if (!(a = slurm_conf_get_aliased_nodename()))
  489. a = slurm_conf_get_nodename("localhost");
  490. if (a) {
  491. n = slurm_conf_get_aliases(a);
  492. xfree(a);
  493. }
  494. }
  495. if (n) {
  496. fprintf(stdout, "%s\n", n);
  497. xfree(n);
  498. }
  499. }
  500. /*
  501. * _reboot_nodes - issue RPC to have computing nodes reboot when idle
  502. * RET 0 or a slurm error code
  503. */
  504. static int _reboot_nodes(char *node_list)
  505. {
  506. slurm_ctl_conf_t *conf;
  507. int rc;
  508. slurm_msg_t msg;
  509. reboot_msg_t req;
  510. conf = slurm_conf_lock();
  511. if (conf->reboot_program == NULL) {
  512. error("RebootProgram isn't defined");
  513. slurm_conf_unlock();
  514. slurm_seterrno(SLURM_ERROR);
  515. return SLURM_ERROR;
  516. }
  517. slurm_conf_unlock();
  518. slurm_msg_t_init(&msg);
  519. req.node_list = node_list;
  520. msg.msg_type = REQUEST_REBOOT_NODES;
  521. msg.data = &req;
  522. if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
  523. return SLURM_ERROR;
  524. if (rc)
  525. slurm_seterrno_ret(rc);
  526. return rc;
  527. }
  528. /*
  529. * _process_command - process the user's command
  530. * IN argc - count of arguments
  531. * IN argv - the arguments
  532. * RET 0 or errno (only for errors fatal to scontrol)
  533. */
  534. static int
  535. _process_command (int argc, char *argv[])
  536. {
  537. int error_code = 0;
  538. char *tag = argv[0];
  539. int tag_len = 0;
  540. if (argc < 1) {
  541. exit_code = 1;
  542. if (quiet_flag == -1)
  543. fprintf(stderr, "no input");
  544. return 0;
  545. } else if(tag)
  546. tag_len = strlen(tag);
  547. else {
  548. if (quiet_flag == -1)
  549. fprintf(stderr, "input problem");
  550. return 0;
  551. }
  552. if (strncasecmp (tag, "abort", MAX(tag_len, 5)) == 0) {
  553. /* require full command name */
  554. if (argc > 2) {
  555. exit_code = 1;
  556. fprintf (stderr,
  557. "too many arguments for keyword:%s\n",
  558. tag);
  559. }
  560. error_code = slurm_shutdown (1);
  561. if (error_code) {
  562. exit_code = 1;
  563. if (quiet_flag != 1)
  564. slurm_perror ("slurm_shutdown error");
  565. }
  566. }
  567. else if (strncasecmp (tag, "all", MAX(tag_len, 2)) == 0)
  568. all_flag = 1;
  569. else if (strncasecmp (tag, "completing", MAX(tag_len, 2)) == 0) {
  570. if (argc > 1) {
  571. exit_code = 1;
  572. fprintf (stderr,
  573. "too many arguments for keyword:%s\n",
  574. tag);
  575. }
  576. scontrol_print_completing();
  577. }
  578. else if (strncasecmp (tag, "cluster", MAX(tag_len, 2)) == 0) {
  579. if (clusters) {
  580. list_destroy(clusters);
  581. clusters = NULL;
  582. working_cluster_rec = NULL;
  583. }
  584. if (argc >= 2) {
  585. if (!(clusters = slurmdb_get_info_cluster(argv[1]))) {
  586. error("'%s' can't be reached now, "
  587. "or it is an invalid entry for "
  588. "--cluster. Use 'sacctmgr --list "
  589. "cluster' to see available clusters.",
  590. optarg);
  591. exit(1);
  592. }
  593. working_cluster_rec = list_peek(clusters);
  594. if (list_count(clusters) > 1) {
  595. fatal("Only one cluster can be used at a time "
  596. "with scontrol");
  597. }
  598. }
  599. cluster_flags = slurmdb_setup_cluster_flags();
  600. slurm_free_block_info_msg(old_block_info_ptr);
  601. old_block_info_ptr = NULL;
  602. slurm_free_front_end_info_msg(old_front_end_info_ptr);
  603. old_front_end_info_ptr = NULL;
  604. slurm_free_job_info_msg(old_job_info_ptr);
  605. old_job_info_ptr = NULL;
  606. slurm_free_node_info_msg(old_node_info_ptr);
  607. old_node_info_ptr = NULL;
  608. slurm_free_partition_info_msg(old_part_info_ptr);
  609. old_part_info_ptr = NULL;
  610. slurm_free_reservation_info_msg(old_res_info_ptr);
  611. old_res_info_ptr = NULL;
  612. slurm_free_ctl_conf(old_slurm_ctl_conf_ptr);
  613. old_slurm_ctl_conf_ptr = NULL;
  614. /* if(old_block_info_ptr) */
  615. /* old_block_info_ptr->last_update = 0; */
  616. /* if(old_job_info_ptr) */
  617. /* old_job_info_ptr->last_update = 0; */
  618. /* if(old_node_info_ptr) */
  619. /* old_node_info_ptr->last_update = 0; */
  620. /* if(old_part_info_ptr) */
  621. /* old_part_info_ptr->last_update = 0; */
  622. /* if(old_res_info_ptr) */
  623. /* old_res_info_ptr->last_update = 0; */
  624. /* if(old_slurm_ctl_conf_ptr) */
  625. /* old_slurm_ctl_conf_ptr->last_update = 0; */
  626. }
  627. else if (strncasecmp (tag, "create", MAX(tag_len, 2)) == 0) {
  628. if (argc < 2) {
  629. exit_code = 1;
  630. fprintf (stderr, "too few arguments for %s keyword\n",
  631. tag);
  632. return 0;
  633. }
  634. _create_it ((argc - 1), &argv[1]);
  635. }
  636. else if (strncasecmp (tag, "details", MAX(tag_len, 1)) == 0) {
  637. if (argc > 1) {
  638. exit_code = 1;
  639. fprintf (stderr,
  640. "too many arguments for keyword:%s\n",
  641. tag);
  642. return 0;
  643. }
  644. detail_flag = 1;
  645. }
  646. else if (strncasecmp (tag, "script", MAX(tag_len, 3)) == 0) {
  647. if (argc > 1) {
  648. exit_code = 1;
  649. fprintf (stderr,
  650. "too many arguments for keyword:%s\n",
  651. tag);
  652. return 0;
  653. }
  654. detail_flag = 2;
  655. }
  656. else if (strncasecmp (tag, "exit", MAX(tag_len, 1)) == 0) {
  657. if (argc > 1) {
  658. exit_code = 1;
  659. fprintf (stderr,
  660. "too many arguments for keyword:%s\n",
  661. tag);
  662. }
  663. exit_flag = 1;
  664. }
  665. else if (strncasecmp (tag, "help", MAX(tag_len, 2)) == 0) {
  666. if (argc > 1) {
  667. exit_code = 1;
  668. fprintf (stderr,
  669. "too many arguments for keyword:%s\n",
  670. tag);
  671. }
  672. _usage ();
  673. }
  674. else if (strncasecmp (tag, "hide", MAX(tag_len, 2)) == 0) {
  675. all_flag = 0;
  676. detail_flag = 0;
  677. }
  678. else if (strncasecmp (tag, "oneliner", MAX(tag_len, 1)) == 0) {
  679. if (argc > 1) {
  680. exit_code = 1;
  681. fprintf (stderr,
  682. "too many arguments for keyword:%s\n",
  683. tag);
  684. }
  685. one_liner = 1;
  686. }
  687. else if (strncasecmp (tag, "pidinfo", MAX(tag_len, 3)) == 0) {
  688. if (argc > 2) {
  689. exit_code = 1;
  690. fprintf (stderr,
  691. "too many arguments for keyword:%s\n",
  692. tag);
  693. } else if (argc < 2) {
  694. exit_code = 1;
  695. fprintf (stderr,
  696. "missing argument for keyword:%s\n",
  697. tag);
  698. } else
  699. scontrol_pid_info ((pid_t) atol (argv[1]) );
  700. }
  701. else if (strncasecmp (tag, "ping", MAX(tag_len, 3)) == 0) {
  702. if (argc > 1) {
  703. exit_code = 1;
  704. fprintf (stderr,
  705. "too many arguments for keyword:%s\n",
  706. tag);
  707. }
  708. _print_ping ();
  709. }
  710. else if ((strncasecmp (tag, "\\q", 2) == 0) ||
  711. (strncasecmp (tag, "quiet", MAX(tag_len, 4)) == 0)) {
  712. if (argc > 1) {
  713. exit_code = 1;
  714. fprintf (stderr, "too many arguments for keyword:%s\n",
  715. tag);
  716. }
  717. quiet_flag = 1;
  718. }
  719. else if (strncasecmp (tag, "quit", MAX(tag_len, 4)) == 0) {
  720. if (argc > 1) {
  721. exit_code = 1;
  722. fprintf (stderr,
  723. "too many arguments for keyword:%s\n",
  724. tag);
  725. }
  726. exit_flag = 1;
  727. }
  728. else if (strncasecmp (tag, "reboot_nodes", MAX(tag_len, 3)) == 0) {
  729. if (argc > 2) {
  730. exit_code = 1;
  731. fprintf (stderr,
  732. "too many arguments for keyword:%s\n",
  733. tag);
  734. } else if (argc < 2) {
  735. error_code = _reboot_nodes("ALL");
  736. } else
  737. error_code = _reboot_nodes(argv[1]);
  738. if (error_code) {
  739. exit_code = 1;
  740. if (quiet_flag != 1)
  741. slurm_perror ("scontrol_reboot_nodes error");
  742. }
  743. }
  744. else if (strncasecmp (tag, "reconfigure", MAX(tag_len, 3)) == 0) {
  745. if (argc > 2) {
  746. exit_code = 1;
  747. fprintf (stderr, "too many arguments for keyword:%s\n",
  748. tag);
  749. }
  750. error_code = slurm_reconfigure();
  751. if (error_code) {
  752. exit_code = 1;
  753. if (quiet_flag != 1)
  754. slurm_perror ("slurm_reconfigure error");
  755. }
  756. }
  757. else if (strncasecmp (tag, "checkpoint", MAX(tag_len, 2)) == 0) {
  758. if (argc > 5) {
  759. exit_code = 1;
  760. if (quiet_flag != 1)
  761. fprintf(stderr,
  762. "too many arguments for keyword:%s\n",
  763. tag);
  764. }
  765. else if (argc < 3) {
  766. exit_code = 1;
  767. if (quiet_flag != 1)
  768. fprintf(stderr,
  769. "too few arguments for keyword:%s\n",
  770. tag);
  771. }
  772. else {
  773. error_code = scontrol_checkpoint(argv[1], argv[2],
  774. argc - 3, &argv[3]);
  775. if (error_code) {
  776. exit_code = 1;
  777. if (quiet_flag != 1)
  778. slurm_perror(
  779. "scontrol_checkpoint error");
  780. }
  781. }
  782. }
  783. else if (strncasecmp (tag, "requeue", MAX(tag_len, 3)) == 0) {
  784. if (argc > 2) {
  785. exit_code = 1;
  786. if (quiet_flag != 1)
  787. fprintf(stderr,
  788. "too many arguments for keyword:%s\n",
  789. tag);
  790. } else if (argc < 2) {
  791. exit_code = 1;
  792. if (quiet_flag != 1)
  793. fprintf(stderr,
  794. "too few arguments for keyword:%s\n",
  795. tag);
  796. } else {
  797. error_code = scontrol_requeue(argv[1]);
  798. if (error_code) {
  799. exit_code = 1;
  800. if (quiet_flag != 1)
  801. slurm_perror ("slurm_requeue error");
  802. }
  803. }
  804. }
  805. else if ((strncasecmp (tag, "hold", 4) == 0) ||
  806. (strncasecmp (tag, "holdu", 5) == 0) ||
  807. (strncasecmp (tag, "uhold", 5) == 0) ||
  808. (strncasecmp (tag, "release", MAX(tag_len, 3)) == 0)) {
  809. if (argc > 2) {
  810. exit_code = 1;
  811. if (quiet_flag != 1)
  812. fprintf(stderr,
  813. "too many arguments for keyword:%s\n",
  814. tag);
  815. }
  816. else if (argc < 2) {
  817. exit_code = 1;
  818. if (quiet_flag != 1)
  819. fprintf(stderr,
  820. "too few arguments for keyword:%s\n",
  821. tag);
  822. } else {
  823. error_code = scontrol_hold(argv[0], argv[1]);
  824. if (error_code) {
  825. exit_code = 1;
  826. if (quiet_flag != 1)
  827. slurm_perror ("slurm_update_job error");
  828. }
  829. }
  830. }
  831. else if ((strncasecmp (tag, "suspend", MAX(tag_len, 2)) == 0) ||
  832. (strncasecmp (tag, "resume", MAX(tag_len, 3)) == 0)) {
  833. if (argc > 2) {
  834. exit_code = 1;
  835. if (quiet_flag != 1)
  836. fprintf(stderr,
  837. "too many arguments for keyword:%s\n",
  838. tag);
  839. }
  840. else if (argc < 2) {
  841. exit_code = 1;
  842. if (quiet_flag != 1)
  843. fprintf(stderr,
  844. "too few arguments for keyword:%s\n",
  845. tag);
  846. } else {
  847. error_code = scontrol_suspend(argv[0], argv[1]);
  848. if (error_code) {
  849. exit_code = 1;
  850. if (quiet_flag != 1)
  851. slurm_perror ("slurm_suspend error");
  852. }
  853. }
  854. }
  855. else if (strncasecmp (tag, "wait_job", MAX(tag_len, 2)) == 0) {
  856. if (cluster_flags & CLUSTER_FLAG_CRAYXT) {
  857. fprintf(stderr,
  858. "wait_job is handled automatically on Cray.\n");
  859. } else if (argc > 2) {
  860. exit_code = 1;
  861. if (quiet_flag != 1)
  862. fprintf(stderr,
  863. "too many arguments for keyword:%s\n",
  864. tag);
  865. } else if (argc < 2) {
  866. exit_code = 1;
  867. if (quiet_flag != 1)
  868. fprintf(stderr,
  869. "too few arguments for keyword:%s\n",
  870. tag);
  871. } else {
  872. error_code = scontrol_job_ready(argv[1]);
  873. if (error_code)
  874. exit_code = 1;
  875. }
  876. }
  877. else if (strncasecmp (tag, "setdebugflags", MAX(tag_len, 9)) == 0) {
  878. if (argc > 2) {
  879. exit_code = 1;
  880. if (quiet_flag != 1)
  881. fprintf(stderr,
  882. "too many arguments for keyword:%s\n",
  883. tag);
  884. } else if (argc < 2) {
  885. exit_code = 1;
  886. if (quiet_flag != 1)
  887. fprintf(stderr,
  888. "too few arguments for keyword:%s\n",
  889. tag);
  890. } else {
  891. int i, mode = 0;
  892. uint32_t debug_flags_plus = 0;
  893. uint32_t debug_flags_minus = 0, flags;
  894. for (i = 1; i < argc; i++) {
  895. if (argv[i][0] == '+')
  896. mode = 1;
  897. else if (argv[i][0] == '-')
  898. mode = -1;
  899. else {
  900. mode = 0;
  901. break;
  902. }
  903. flags = debug_str2flags(&argv[i][1]);
  904. if (flags == NO_VAL)
  905. break;
  906. if (mode == 1)
  907. debug_flags_plus |= flags;
  908. else
  909. debug_flags_minus |= flags;
  910. }
  911. if (i < argc) {
  912. exit_code = 1;
  913. if (quiet_flag != 1) {
  914. fprintf(stderr, "invalid debug "
  915. "flag: %s\n", argv[i]);
  916. }
  917. if ((quiet_flag != 1) && (mode = 0)) {
  918. fprintf(stderr, "Usage: setdebugflags"
  919. " [+|-]NAME\n");
  920. }
  921. } else {
  922. error_code = slurm_set_debugflags(
  923. debug_flags_plus, debug_flags_minus);
  924. if (error_code) {
  925. exit_code = 1;
  926. if (quiet_flag != 1)
  927. slurm_perror(
  928. "slurm_set_debug_flags"
  929. " error");
  930. }
  931. }
  932. }
  933. }
  934. else if (strncasecmp (tag, "setdebug", MAX(tag_len, 2)) == 0) {
  935. if (argc > 2) {
  936. exit_code = 1;
  937. if (quiet_flag != 1)
  938. fprintf(stderr,
  939. "too many arguments for keyword:%s\n",
  940. tag);
  941. } else if (argc < 2) {
  942. exit_code = 1;
  943. if (quiet_flag != 1)
  944. fprintf(stderr,
  945. "too few arguments for keyword:%s\n",
  946. tag);
  947. } else {
  948. int level = -1;
  949. char *endptr;
  950. char *levels[] = {
  951. "quiet", "fatal", "error", "info", "verbose",
  952. "debug", "debug2", "debug3", "debug4",
  953. "debug5", NULL};
  954. int index = 0;
  955. while (levels[index]) {
  956. if (strcasecmp(argv[1], levels[index]) == 0) {
  957. level = index;
  958. break;
  959. }
  960. index ++;
  961. }
  962. if (level == -1) {
  963. /* effective levels: 0 - 9 */
  964. level = (int)strtoul (argv[1], &endptr, 10);
  965. if (*endptr != '\0' || level > 9) {
  966. level = -1;
  967. exit_code = 1;
  968. if (quiet_flag != 1)
  969. fprintf(stderr, "invalid "
  970. "debug level: %s\n",
  971. argv[1]);
  972. }
  973. }
  974. if (level != -1) {
  975. error_code = slurm_set_debug_level(
  976. level);
  977. if (error_code) {
  978. exit_code = 1;
  979. if (quiet_flag != 1)
  980. slurm_perror(
  981. "slurm_set_debug_level "
  982. "error");
  983. }
  984. }
  985. }
  986. }
  987. else if (strncasecmp (tag, "schedloglevel", MAX(tag_len, 3)) == 0) {
  988. if (argc > 2) {
  989. exit_code = 1;
  990. if (quiet_flag != 1)
  991. fprintf(stderr,
  992. "too many arguments for keyword:%s\n",
  993. tag);
  994. } else if (argc < 2) {
  995. exit_code = 1;
  996. if (quiet_flag != 1)
  997. fprintf(stderr,
  998. "too few arguments for keyword:%s\n",
  999. tag);
  1000. } else {
  1001. int level = -1;
  1002. char *endptr;
  1003. char *levels[] = {
  1004. "disable", "enable", NULL};
  1005. int index = 0;
  1006. while (levels[index]) {
  1007. if (strcasecmp(argv[1], levels[index]) == 0) {
  1008. level = index;
  1009. break;
  1010. }
  1011. index ++;
  1012. }
  1013. if (level == -1) {
  1014. /* effective levels: 0 - 1 */
  1015. level = (int)strtoul (argv[1], &endptr, 10);
  1016. if (*endptr != '\0' || level > 1) {
  1017. level = -1;
  1018. exit_code = 1;
  1019. if (quiet_flag != 1)
  1020. fprintf(stderr, "invalid schedlog "
  1021. "level: %s\n", argv[1]);
  1022. }
  1023. }
  1024. if (level != -1) {
  1025. error_code = slurm_set_schedlog_level(
  1026. level);
  1027. if (error_code) {
  1028. exit_code = 1;
  1029. if (quiet_flag != 1)
  1030. slurm_perror(
  1031. "slurm_set_schedlog_level"
  1032. " error");
  1033. }
  1034. }
  1035. }
  1036. }
  1037. else if (strncasecmp (tag, "show", MAX(tag_len, 3)) == 0) {
  1038. _show_it (argc, argv);
  1039. }
  1040. else if (strncasecmp (tag, "takeover", MAX(tag_len, 8)) == 0) {
  1041. char *secondary = NULL;
  1042. slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL;
  1043. slurm_ctl_conf_ptr = slurm_conf_lock();
  1044. secondary = xstrdup(slurm_ctl_conf_ptr->backup_controller);
  1045. slurm_conf_unlock();
  1046. if ( secondary && secondary[0] != '\0' ) {
  1047. error_code = slurm_takeover();
  1048. if (error_code) {
  1049. exit_code = 1;
  1050. if (quiet_flag != 1)
  1051. slurm_perror("slurm_takeover error");
  1052. }
  1053. } else {
  1054. fprintf(stderr, "slurm_takeover error: no backup "
  1055. "controller defined\n");
  1056. }
  1057. xfree(secondary);
  1058. }
  1059. else if (strncasecmp (tag, "shutdown", MAX(tag_len, 8)) == 0) {
  1060. /* require full command name */
  1061. uint16_t options = 0;
  1062. if (argc == 2) {
  1063. if (strcmp(argv[1], "slurmctld") &&
  1064. strcmp(argv[1], "controller")) {
  1065. error_code = 1;
  1066. exit_code = 1;
  1067. fprintf (stderr,
  1068. "invalid shutdown argument:%s\n",
  1069. argv[1]);
  1070. } else
  1071. options= 2;
  1072. } else if (argc > 2) {
  1073. error_code = 1;
  1074. exit_code = 1;
  1075. fprintf (stderr,
  1076. "too many arguments for keyword:%s\n",
  1077. tag);
  1078. }
  1079. if (error_code == 0) {
  1080. error_code = slurm_shutdown(options);
  1081. if (error_code) {
  1082. exit_code = 1;
  1083. if (quiet_flag != 1)
  1084. slurm_perror ("slurm_shutdown error");
  1085. }
  1086. }
  1087. }
  1088. else if (strncasecmp (tag, "update", MAX(tag_len, 1)) == 0) {
  1089. if (argc < 2) {
  1090. exit_code = 1;
  1091. fprintf (stderr, "too few arguments for %s keyword\n",
  1092. tag);
  1093. return 0;
  1094. }
  1095. _update_it ((argc - 1), &argv[1]);
  1096. }
  1097. else if (strncasecmp (tag, "delete", MAX(tag_len, 1)) == 0) {
  1098. if (argc < 2) {
  1099. exit_code = 1;
  1100. fprintf (stderr, "too few arguments for %s keyword\n",
  1101. tag);
  1102. return 0;
  1103. }
  1104. _delete_it ((argc - 1), &argv[1]);
  1105. }
  1106. else if (strncasecmp (tag, "verbose", MAX(tag_len, 4)) == 0) {
  1107. if (argc > 1) {
  1108. exit_code = 1;
  1109. fprintf (stderr,
  1110. "too many arguments for %s keyword\n",
  1111. tag);
  1112. }
  1113. quiet_flag = -1;
  1114. }
  1115. else if (strncasecmp (tag, "version", MAX(tag_len, 4)) == 0) {
  1116. if (argc > 1) {
  1117. exit_code = 1;
  1118. fprintf (stderr,
  1119. "too many arguments for %s keyword\n",
  1120. tag);
  1121. }
  1122. _print_version();
  1123. }
  1124. else if (strncasecmp (tag, "listpids", MAX(tag_len, 1)) == 0) {
  1125. if (argc > 3) {
  1126. exit_code = 1;
  1127. fprintf (stderr,
  1128. "too many arguments for keyword:%s\n",
  1129. tag);
  1130. } else {
  1131. scontrol_list_pids (argc == 1 ? NULL : argv[1],
  1132. argc <= 2 ? NULL : argv[2]);
  1133. }
  1134. }
  1135. else if (strncasecmp (tag, "notify", MAX(tag_len, 1)) == 0) {
  1136. if (argc < 3) {
  1137. exit_code = 1;
  1138. fprintf (stderr,
  1139. "too few arguments for keyword:%s\n",
  1140. tag);
  1141. } else if (scontrol_job_notify(argc-1, &argv[1])) {
  1142. exit_code = 1;
  1143. slurm_perror("job notify failure");
  1144. }
  1145. }
  1146. else {
  1147. exit_code = 1;
  1148. fprintf (stderr, "invalid keyword: %s\n", tag);
  1149. }
  1150. return 0;
  1151. }
  1152. /*
  1153. * _create_it - create a slurm configuration per the supplied arguments
  1154. * IN argc - count of arguments
  1155. * IN argv - list of arguments
  1156. */
  1157. static void
  1158. _create_it (int argc, char *argv[])
  1159. {
  1160. /* Scan for "res" first, anywhere in the args. When creating
  1161. a reservation there is a partition= option, which we don't
  1162. want to mistake for a requestion to create a partition. */
  1163. int i, error_code = SLURM_SUCCESS;
  1164. for (i=0; i<argc; i++) {
  1165. char *tag = argv[i];
  1166. char *val = strchr(argv[i], '=');
  1167. int tag_len;
  1168. if (val) {
  1169. tag_len = val - argv[i];
  1170. val++;
  1171. } else {
  1172. tag_len = strlen(tag);
  1173. }
  1174. if (!strncasecmp(tag, "ReservationName", MAX(tag_len, 3))) {
  1175. error_code = scontrol_create_res(argc, argv);
  1176. break;
  1177. } else if (!strncasecmp(tag, "PartitionName", MAX(tag_len, 3))) {
  1178. error_code = scontrol_create_part(argc, argv);
  1179. break;
  1180. }
  1181. }
  1182. if (i >= argc) {
  1183. exit_code = 1;
  1184. error("Invalid creation entity: %s", argv[0]);
  1185. } else if (error_code)
  1186. exit_code = 1;
  1187. }
  1188. /*
  1189. * _delete_it - delete the specified slurm entity
  1190. * IN argc - count of arguments
  1191. * IN argv - list of arguments
  1192. */
  1193. static void
  1194. _delete_it (int argc, char *argv[])
  1195. {
  1196. char *tag = NULL, *val = NULL;
  1197. int tag_len = 0;
  1198. if (argc != 1) {
  1199. error("Only one option follows delete. %d given.", argc);
  1200. exit_code = 1;
  1201. return;
  1202. }
  1203. tag = argv[0];
  1204. val = strchr(argv[0], '=');
  1205. if (val) {
  1206. tag_len = val - argv[0];
  1207. val++;
  1208. } else {
  1209. error("Proper format is 'delete Partition=p'"
  1210. " or 'delete Reservation=r'");
  1211. exit_code = 1;
  1212. return;
  1213. }
  1214. /* First identify the entity type to delete */
  1215. if (strncasecmp (tag, "PartitionName", MAX(tag_len, 3)) == 0) {
  1216. delete_part_msg_t part_msg;
  1217. part_msg.name = val;
  1218. if (slurm_delete_partition(&part_msg)) {
  1219. char errmsg[64];
  1220. snprintf(errmsg, 64, "delete_partition %s", argv[0]);
  1221. slurm_perror(errmsg);
  1222. }
  1223. } else if (strncasecmp (tag, "ReservationName", MAX(tag_len, 3)) == 0) {
  1224. reservation_name_msg_t res_msg;
  1225. res_msg.name = val;
  1226. if (slurm_delete_reservation(&res_msg)) {
  1227. char errmsg[64];
  1228. snprintf(errmsg, 64, "delete_reservation %s", argv[0]);
  1229. slurm_perror(errmsg);
  1230. }
  1231. } else if (strncasecmp (tag, "BlockName", MAX(tag_len, 3)) == 0) {
  1232. if(cluster_flags & CLUSTER_FLAG_BG) {
  1233. update_block_msg_t block_msg;
  1234. slurm_init_update_block_msg ( &block_msg );
  1235. block_msg.bg_block_id = val;
  1236. block_msg.state = BG_BLOCK_NAV;
  1237. if (slurm_update_block(&block_msg)) {
  1238. char errmsg[64];
  1239. snprintf(errmsg, 64, "delete_block %s",
  1240. argv[0]);
  1241. slurm_perror(errmsg);
  1242. }
  1243. } else {
  1244. exit_code = 1;
  1245. fprintf(stderr,
  1246. "This only works on a bluegene system.\n");
  1247. }
  1248. } else {
  1249. exit_code = 1;
  1250. fprintf(stderr, "Invalid deletion entity: %s\n", argv[0]);
  1251. }
  1252. }
  1253. /*
  1254. * _show_it - print a description of the specified slurm entity
  1255. * IN argc - count of arguments
  1256. * IN argv - list of arguments
  1257. */
  1258. static void
  1259. _show_it (int argc, char *argv[])
  1260. {
  1261. char *tag = NULL, *val = NULL;
  1262. int tag_len = 0;
  1263. if (argc > 3) {
  1264. exit_code = 1;
  1265. if (quiet_flag != 1)
  1266. fprintf(stderr,
  1267. "too many arguments for keyword:%s\n",
  1268. argv[0]);
  1269. return;
  1270. }
  1271. else if (argc < 2) {
  1272. exit_code = 1;
  1273. if (quiet_flag != 1)
  1274. fprintf(stderr,
  1275. "too few arguments for keyword:%s\n", argv[0]);
  1276. return;
  1277. }
  1278. tag = argv[1];
  1279. tag_len = strlen(tag);
  1280. val = strchr(argv[1], '=');
  1281. if (val) {
  1282. tag_len = val - argv[1];
  1283. val++;
  1284. } else if (argc == 3) {
  1285. val = argv[2];
  1286. } else {
  1287. val = NULL;
  1288. }
  1289. if (strncasecmp (tag, "aliases", MAX(tag_len, 1)) == 0) {
  1290. if (val)
  1291. _print_aliases (val);
  1292. else
  1293. _print_aliases (NULL);
  1294. } else if (strncasecmp (tag, "blocks", MAX(tag_len, 1)) == 0) {
  1295. scontrol_print_block (val);
  1296. } else if (strncasecmp (tag, "config", MAX(tag_len, 1)) == 0) {
  1297. _print_config (val);
  1298. } else if (strncasecmp (tag, "daemons", MAX(tag_len, 1)) == 0) {
  1299. if (val) {
  1300. exit_code = 1;
  1301. if (quiet_flag != 1)
  1302. fprintf(stderr,
  1303. "too many arguments for keyword:%s\n",
  1304. argv[0]);
  1305. }
  1306. _print_daemons ();
  1307. } else if (strncasecmp (tag, "FrontendName", MAX(tag_len, 1)) == 0) {
  1308. scontrol_print_front_end_list(val);
  1309. } else if (strncasecmp (tag, "hostnames", MAX(tag_len, 5)) == 0) {
  1310. if (val)
  1311. scontrol_print_hosts(val);
  1312. else
  1313. scontrol_print_hosts(getenv("SLURM_NODELIST"));
  1314. } else if (strncasecmp (tag, "hostlist", MAX(tag_len, 5)) == 0) {
  1315. if (!val) {
  1316. exit_code = 1;
  1317. fprintf(stderr, "invalid encode argument\n");
  1318. _usage();
  1319. } else if (scontrol_encode_hostlist(val))
  1320. exit_code = 1;
  1321. } else if (strncasecmp (tag, "jobs", MAX(tag_len, 1)) == 0 ||
  1322. strncasecmp (tag, "jobid", MAX(tag_len, 1)) == 0 ) {
  1323. scontrol_print_job (val);
  1324. } else if (strncasecmp (tag, "nodes", MAX(tag_len, 1)) == 0) {
  1325. scontrol_print_node_list (val);
  1326. } else if (strncasecmp (tag, "partitions", MAX(tag_len, 1)) == 0 ||
  1327. strncasecmp (tag, "partitionname", MAX(tag_len, 1)) == 0) {
  1328. scontrol_print_part (val);
  1329. } else if (strncasecmp (tag, "reservations", MAX(tag_len, 1)) == 0 ||
  1330. strncasecmp (tag, "reservationname", MAX(tag_len, 1)) == 0) {
  1331. scontrol_print_res (val);
  1332. } else if (strncasecmp (tag, "slurmd", MAX(tag_len, 2)) == 0) {
  1333. _print_slurmd (val);
  1334. } else if (strncasecmp (tag, "steps", MAX(tag_len, 2)) == 0) {
  1335. scontrol_print_step (val);
  1336. } else if (strncasecmp (tag, "topology", MAX(tag_len, 1)) == 0) {
  1337. scontrol_print_topo (val);
  1338. } else {
  1339. exit_code = 1;
  1340. if (quiet_flag != 1)
  1341. fprintf (stderr,
  1342. "invalid entity:%s for keyword:%s \n",
  1343. tag, argv[0]);
  1344. }
  1345. }
  1346. /*
  1347. * _update_it - update the slurm configuration per the supplied arguments
  1348. * IN argc - count of arguments
  1349. * IN argv - list of arguments
  1350. */
  1351. static void
  1352. _update_it (int argc, char *argv[])
  1353. {
  1354. char *val = NULL;
  1355. int i, error_code = SLURM_SUCCESS;
  1356. int node_tag = 0, part_tag = 0, job_tag = 0;
  1357. int block_tag = 0, sub_tag = 0, res_tag = 0;
  1358. int debug_tag = 0, step_tag = 0, front_end_tag = 0;
  1359. /* First identify the entity to update */
  1360. for (i=0; i<argc; i++) {
  1361. char *tag = argv[i];
  1362. int tag_len = 0;
  1363. val = strchr(argv[i], '=');
  1364. if (!val)
  1365. continue;
  1366. tag_len = val - argv[i];
  1367. val++;
  1368. if (!strncasecmp(tag, "NodeName", MAX(tag_len, 3))) {
  1369. node_tag = 1;
  1370. } else if (!strncasecmp(tag, "PartitionName",
  1371. MAX(tag_len, 3))) {
  1372. part_tag = 1;
  1373. } else if (!strncasecmp(tag, "JobId", MAX(tag_len, 3))) {
  1374. job_tag = 1;
  1375. } else if (!strncasecmp(tag, "StepId", MAX(tag_len, 4))) {
  1376. step_tag = 1;
  1377. } else if (!strncasecmp(tag, "BlockName", MAX(tag_len, 3))) {
  1378. block_tag = 1;
  1379. } else if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 3))
  1380. || !strncasecmp(tag, "SubMPName", MAX(tag_len, 3))) {
  1381. sub_tag = 1;
  1382. } else if (!strncasecmp(tag, "FrontendName",
  1383. MAX(tag_len, 2))) {
  1384. front_end_tag = 1;
  1385. } else if (!strncasecmp(tag, "ReservationName",
  1386. MAX(tag_len, 3))) {
  1387. res_tag = 1;
  1388. } else if (!strncasecmp(tag, "SlurmctldDebug",
  1389. MAX(tag_len, 2))) {
  1390. debug_tag= 1;
  1391. }
  1392. }
  1393. /* The order of tests matters here. An update job request can include
  1394. * partition and reservation tags, possibly before the jobid tag, but
  1395. * none of the other updates have a jobid tag, so check jobtag first.
  1396. * Likewise, check restag next, because reservations can have a
  1397. * partition tag. The order of the rest doesn't matter because there
  1398. * aren't any other duplicate tags. */
  1399. if (job_tag)
  1400. error_code = scontrol_update_job (argc, argv);
  1401. else if (step_tag)
  1402. error_code = scontrol_update_step (argc, argv);
  1403. else if (res_tag)
  1404. error_code = scontrol_update_res (argc, argv);
  1405. else if (node_tag)
  1406. error_code = scontrol_update_node (argc, argv);
  1407. else if (front_end_tag)
  1408. error_code = scontrol_update_front_end (argc, argv);
  1409. else if (part_tag)
  1410. error_code = scontrol_update_part (argc, argv);
  1411. else if (block_tag)
  1412. error_code = _update_bluegene_block (argc, argv);
  1413. else if (sub_tag)
  1414. error_code = _update_bluegene_subbp (argc, argv);
  1415. else if (debug_tag)
  1416. error_code = _update_slurmctld_debug(val);
  1417. else {
  1418. exit_code = 1;
  1419. fprintf(stderr, "No valid entity in update command\n");
  1420. fprintf(stderr, "Input line must include \"NodeName\", ");
  1421. if(cluster_flags & CLUSTER_FLAG_BG) {
  1422. fprintf(stderr, "\"BlockName\", \"SubMPName\" "
  1423. "(i.e. bgl000[0-3]),");
  1424. }
  1425. fprintf(stderr, "\"PartitionName\", \"Reservation\", "
  1426. "\"JobId\", or \"SlurmctldDebug\" \n");
  1427. }
  1428. if (error_code) {
  1429. exit_code = 1;
  1430. slurm_perror ("slurm_update error");
  1431. }
  1432. }
  1433. /*
  1434. * _update_bluegene_block - update the bluegene block per the
  1435. * supplied arguments
  1436. * IN argc - count of arguments
  1437. * IN argv - list of arguments
  1438. * RET 0 if no slurm error, errno otherwise. parsing error prints
  1439. * error message and returns 0
  1440. */
  1441. static int
  1442. _update_bluegene_block (int argc, char *argv[])
  1443. {
  1444. int i, update_cnt = 0;
  1445. update_block_msg_t block_msg;
  1446. if(!(cluster_flags & CLUSTER_FLAG_BG)) {
  1447. exit_code = 1;
  1448. fprintf(stderr, "This only works on a bluegene system.\n");
  1449. return 0;
  1450. }
  1451. slurm_init_update_block_msg ( &block_msg );
  1452. for (i=0; i<argc; i++) {
  1453. char *tag = argv[i];
  1454. char *val = strchr(argv[i], '=');
  1455. int tag_len = 0, vallen = 0;
  1456. if (val) {
  1457. tag_len = val - argv[i];
  1458. val++;
  1459. vallen = strlen(val);
  1460. } else {
  1461. exit_code = 1;
  1462. error("Invalid input for BlueGene block "
  1463. "update %s",
  1464. argv[i]);
  1465. return 0;
  1466. }
  1467. if (!strncasecmp(tag, "BlockName", MAX(tag_len, 2))) {
  1468. block_msg.bg_block_id = val;
  1469. } else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) {
  1470. if (!strncasecmp(val, "ERROR", MAX(vallen, 1)))
  1471. block_msg.state = BG_BLOCK_ERROR_FLAG;
  1472. else if (!strncasecmp(val, "FREE", MAX(vallen, 1)))
  1473. block_msg.state = BG_BLOCK_FREE;
  1474. else if (!strncasecmp(val, "RECREATE", MAX(vallen, 3)))
  1475. block_msg.state = BG_BLOCK_BOOTING;
  1476. else if (!strncasecmp(val, "REMOVE", MAX(vallen, 3)))
  1477. block_msg.state = BG_BLOCK_NAV;
  1478. else if (!strncasecmp(val, "RESUME", MAX(vallen, 3)))
  1479. block_msg.state = BG_BLOCK_TERM;
  1480. else {
  1481. exit_code = 1;
  1482. fprintf (stderr, "Invalid input: %s\n",
  1483. argv[i]);
  1484. fprintf (stderr,
  1485. "Acceptable State values "
  1486. "are ERROR, FREE, RECREATE, "
  1487. "REMOVE, RESUME\n");
  1488. return 0;
  1489. }
  1490. update_cnt++;
  1491. } else {
  1492. exit_code = 1;
  1493. error("Invalid input for BlueGene block update %s",
  1494. argv[i]);
  1495. return 0;
  1496. }
  1497. }
  1498. if(!block_msg.bg_block_id) {
  1499. error("You didn't supply a block name.");
  1500. return 0;
  1501. } else if (block_msg.state == (uint16_t)NO_VAL) {
  1502. error("You didn't give me a state to set %s to "
  1503. "(i.e. FREE, ERROR).", block_msg.mp_str);
  1504. return 0;
  1505. }
  1506. if (slurm_update_block(&block_msg)) {
  1507. exit_code = 1;
  1508. return slurm_get_errno ();
  1509. } else
  1510. return 0;
  1511. }
  1512. /*
  1513. * _update_bluegene_subbp - update the bluegene nodecards per the
  1514. * supplied arguments
  1515. * IN argc - count of arguments
  1516. * IN argv - list of arguments
  1517. * RET 0 if no slurm error, errno otherwise. parsing error prints
  1518. * error message and returns 0
  1519. */
  1520. static int
  1521. _update_bluegene_subbp (int argc, char *argv[])
  1522. {
  1523. int i, update_cnt = 0;
  1524. update_block_msg_t block_msg;
  1525. if(!(cluster_flags & CLUSTER_FLAG_BG)) {
  1526. exit_code = 1;
  1527. fprintf(stderr, "This only works on a bluegene system.\n");
  1528. return 0;
  1529. }
  1530. slurm_init_update_block_msg ( &block_msg );
  1531. for (i=0; i<argc; i++) {
  1532. char *tag = argv[i];
  1533. char *val = strchr(argv[i], '=');
  1534. int tag_len = 0, vallen = 0;
  1535. if (val) {
  1536. tag_len = val - argv[i];
  1537. val++;
  1538. vallen = strlen(val);
  1539. } else {
  1540. exit_code = 1;
  1541. error("Invalid input for BlueGene SubMPName update %s",
  1542. argv[i]);
  1543. return 0;
  1544. }
  1545. if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 2))
  1546. || !strncasecmp(tag, "SubMPName", MAX(tag_len, 2)))
  1547. block_msg.mp_str = val;
  1548. else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) {
  1549. if (!strncasecmp(val, "ERROR", MAX(vallen, 1)))
  1550. block_msg.state = BG_BLOCK_ERROR_FLAG;
  1551. else if (!strncasecmp(val, "FREE", MAX(vallen, 1)))
  1552. block_msg.state = BG_BLOCK_FREE;
  1553. else {
  1554. exit_code = 1;
  1555. fprintf (stderr, "Invalid input: %s\n",
  1556. argv[i]);
  1557. fprintf (stderr, "Acceptable State values "
  1558. "are FREE and ERROR\n");
  1559. return 0;
  1560. }
  1561. update_cnt++;
  1562. } else {
  1563. exit_code = 1;
  1564. error("Invalid input for BlueGene SubMPName update %s",
  1565. argv[i]);
  1566. return 0;
  1567. }
  1568. }
  1569. if(!block_msg.mp_str) {
  1570. error("You didn't supply an ionode list.");
  1571. return 0;
  1572. } else if (block_msg.state == (uint16_t)NO_VAL) {
  1573. error("You didn't give me a state to set %s to "
  1574. "(i.e. FREE, ERROR).", block_msg.mp_str);
  1575. return 0;
  1576. }
  1577. if (slurm_update_block(&block_msg)) {
  1578. exit_code = 1;
  1579. return slurm_get_errno ();
  1580. } else
  1581. return 0;
  1582. }
  1583. /*
  1584. * _update_slurmctld_debug - update the slurmctld debug level
  1585. * IN val - new value
  1586. * RET 0 if no slurm error, errno otherwise. parsing error prints
  1587. * error message and returns 0
  1588. */
  1589. static int _update_slurmctld_debug(char *val)
  1590. {
  1591. char *endptr;
  1592. int error_code = SLURM_SUCCESS;
  1593. uint32_t level = (uint32_t)strtoul(val, &endptr, 10);
  1594. if (*endptr != '\0' || level > 9) {
  1595. error_code = 1;
  1596. if (quiet_flag != 1)
  1597. fprintf(stderr, "invalid debug level: %s\n",
  1598. val);
  1599. } else {
  1600. error_code = slurm_set_debug_level(level);
  1601. }
  1602. return error_code;
  1603. }
  1604. /* _usage - show the valid scontrol commands */
  1605. void
  1606. _usage () {
  1607. printf ("\
  1608. scontrol [<OPTION>] [<COMMAND>] \n\
  1609. Valid <OPTION> values are: \n\
  1610. -a or --all: equivalent to \"all\" command \n\
  1611. -d or --details: equivalent to \"details\" command \n\
  1612. -h or --help: equivalent to \"help\" command \n\
  1613. --hide: equivalent to \"hide\" command \n\
  1614. -M or --cluster: equivalent to \"cluster\" command \n\
  1615. -o or --oneliner: equivalent to \"oneliner\" command \n\
  1616. -Q or --quiet: equivalent to \"quiet\" command \n\
  1617. -v or --verbose: equivalent to \"verbose\" command \n\
  1618. -V or --version: equivalent to \"version\" command \n\
  1619. \n\
  1620. <keyword> may be omitted from the execute line and scontrol will execute \n\
  1621. in interactive mode. It will process commands as entered until explicitly\n\
  1622. terminated. \n\
  1623. \n\
  1624. Valid <COMMAND> values are: \n\
  1625. abort shutdown slurm controller immediately \n\
  1626. generating a core file. \n\
  1627. all display information about all partitions, \n\
  1628. including hidden partitions. \n\
  1629. cluster cluster to issue commands to. Default is \n\
  1630. current cluster. cluster with no name will \n\
  1631. reset to default. \n\
  1632. checkpoint <CH_OP><ID> perform a checkpoint operation on identified \n\
  1633. job or job step \n\
  1634. completing display jobs in completing state along with \n\
  1635. their completing or down nodes \n\
  1636. create <SPECIFICATIONS> create a new partition or reservation \n\
  1637. details evokes additional details from the \"show\" \n\
  1638. command \n\
  1639. delete <SPECIFICATIONS> delete the specified partition or reservation\n\
  1640. On Dynamic layout Bluegene systems you can also\n\
  1641. delete blocks. \n\
  1642. exit terminate scontrol \n\
  1643. help print this description of use. \n\
  1644. hold <job_id> prevent specified job from starting (see release)\n\
  1645. holdu <job_id> place user hold on specified job (see release)\n\
  1646. hide do not display information about hidden \n\
  1647. partitions \n\
  1648. listpids <job_id<.step>> List pids associated with the given jobid, or\n\
  1649. all jobs if no id is given (This will only \n\
  1650. display the processes on the node which the \n\
  1651. scontrol is ran on, and only for those \n\
  1652. processes spawned by SLURM and their \n\
  1653. descendants) \n\
  1654. notify <job_id> msg send message to specified job \n\
  1655. oneliner report output one record per line. \n\
  1656. pidinfo <pid> return slurm job information for given pid. \n\
  1657. ping print status of slurmctld daemons. \n\
  1658. quiet print no messages other than error messages. \n\
  1659. quit terminate this command. \n\
  1660. reboot_nodes [<nodelist>] reboot the nodes when they become idle. \n\
  1661. By default all nodes are rebooted. \n\
  1662. reconfigure re-read configuration files. \n\
  1663. release <job_id> permit specified job to start (see hold) \n\
  1664. requeue <job_id> re-queue a batch job \n\
  1665. resume <job_id> resume previously suspended job (see suspend)\n\
  1666. setdebug <level> set slurmctld debug level \n\
  1667. setdebugflags [+|-]<flag> add or remove slurmctld DebugFlags \n\
  1668. schedloglevel <slevel> set scheduler log level \n\
  1669. show <ENTITY> [<ID>] display state of identified entity, default \n\
  1670. is all records. \n\
  1671. shutdown <OPTS> shutdown slurm daemons \n\
  1672. (the primary controller will be stopped) \n\
  1673. suspend <job_id> susend specified job (see resume) \n\
  1674. takeover ask slurm backup controller to take over \n\
  1675. uhold <job_id> place user hold on specified job (see release)\n\
  1676. update <SPECIFICATIONS> update job, node, partition, reservation, \n\
  1677. step or bluegene block/subbp configuration \n\
  1678. verbose enable detailed logging. \n\
  1679. version display tool version number. \n\
  1680. wait_job <job_id> wait until the nodes allocated to the job \n\
  1681. are booted and usable \n\
  1682. !! Repeat the last command entered. \n\
  1683. \n\
  1684. <ENTITY> may be \"aliases\", \"config\", \"daemons\", \"frontend\", \n\
  1685. \"hostlist\", \"hostnames\", \"job\", \"node\", \"partition\", \n\
  1686. \"reservation\", \"slurmd\", \"step\", or \"topology\" \n\
  1687. (also for BlueGene only: \"block\" or \"subbp\"). \n\
  1688. \n\
  1689. <ID> may be a configuration parameter name, job id, node name, partition \n\
  1690. name, reservation name, job step id, or hostlist or pathname to a \n\
  1691. list of host names. \n\
  1692. \n\
  1693. <HOSTLIST> may either be a comma separated list of host names or the \n\
  1694. absolute pathname of a file (with leading '/' containing host names \n\
  1695. either separated by commas or new-lines \n\
  1696. \n\
  1697. <LEVEL> may be an integer value like SlurmctldDebug in the slurm.conf \n\
  1698. file or the name of the most detailed errors to report (e.g. \"info\",\n\
  1699. \"verbose\", \"debug\", \"debug2\", etc.). \n\
  1700. \n\
  1701. <SLEVEL> may be an integer value like SlurmSchedLogLevel in the \n\
  1702. slurm.conf file or \"enable\" or \"disable\". \n\
  1703. \n\
  1704. <OPTS> may be \"slurmctld\" to shutdown just the slurmctld daemon, \n\
  1705. otherwise all slurm daemons are shutdown \n\
  1706. \n\
  1707. Node names may be specified using simple range expressions, \n\
  1708. (e.g. \"lx[10-20]\" corresponds to lx10, lx11, lx12, ...) \n\
  1709. The job step id is the job id followed by a period and the step id. \n\
  1710. \n\
  1711. <SPECIFICATIONS> are specified in the same format as the configuration \n\
  1712. file. You may wish to use the \"show\" keyword then use its output as \n\
  1713. input for the update keyword, editing as needed. Bluegene blocks/subbps \n\
  1714. are only able to be set to an error or free state. You can also remove \n\
  1715. blocks by specifying 'remove' as the state. The remove option is only \n\
  1716. valid on Dynamic layout systems. \n\
  1717. (Bluegene systems only) \n\
  1718. \n\
  1719. <CH_OP> identify checkpoint operations and may be \"able\", \"disable\", \n\
  1720. \"enable\", \"create\", \"vacate\", \"requeue\", \"restart\", or \"error\"\n\
  1721. Additional options include \"ImageDir=<dir>\", \"MaxWait=<seconds>\" and \n\
  1722. \"StickToNodes\" \n\
  1723. \n\
  1724. All commands and options are case-insensitive, although node names and \n\
  1725. partition names tests are case-sensitive (node names \"LX\" and \"lx\" \n\
  1726. are distinct). \n\n");
  1727. }