PageRenderTime 67ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/src/srun/libsrun/opt.c

https://github.com/cfenoy/slurm
C | 2668 lines | 2263 code | 212 blank | 193 comment | 428 complexity | 0aea047511aa93a759b9be877dfd2ded MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * opt.c - options processing for srun
  3. *****************************************************************************
  4. * Copyright (C) 2002-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  7. * Written by Mark Grondona <grondona1@llnl.gov>, et. al.
  8. * CODE-OCEC-09-009. All rights reserved.
  9. *
  10. * This file is part of SLURM, a resource management program.
  11. * For details, see <http://www.schedmd.com/slurmdocs/>.
  12. * Please also read the included file: DISCLAIMER.
  13. *
  14. * SLURM is free software; you can redistribute it and/or modify it under
  15. * the terms of the GNU General Public License as published by the Free
  16. * Software Foundation; either version 2 of the License, or (at your option)
  17. * any later version.
  18. *
  19. * In addition, as a special exception, the copyright holders give permission
  20. * to link the code of portions of this program with the OpenSSL library under
  21. * certain conditions as described in each individual source file, and
  22. * distribute linked combinations including the two. You must obey the GNU
  23. * General Public License in all respects for all of the code used other than
  24. * OpenSSL. If you modify file(s) with this exception, you may extend this
  25. * exception to your version of the file(s), but you are not obligated to do
  26. * so. If you do not wish to do so, delete this exception statement from your
  27. * version. If you delete this exception statement from all source files in
  28. * the program, then also delete it here.
  29. *
  30. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  31. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  32. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  33. * details.
  34. *
  35. * You should have received a copy of the GNU General Public License along
  36. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  37. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  38. \*****************************************************************************/
  39. #if HAVE_CONFIG_H
  40. # include "config.h"
  41. #endif
  42. #include <string.h> /* strcpy, strncasecmp */
  43. #ifdef HAVE_STRINGS_H
  44. # include <strings.h>
  45. #endif
  46. #ifndef _GNU_SOURCE
  47. # define _GNU_SOURCE
  48. #endif
  49. #if HAVE_GETOPT_H
  50. # include <getopt.h>
  51. #else
  52. # include "src/common/getopt.h"
  53. #endif
  54. #ifdef HAVE_LIMITS_H
  55. # include <limits.h>
  56. #endif
  57. #include <fcntl.h>
  58. #include <stdarg.h> /* va_start */
  59. #include <stdio.h>
  60. #include <stdlib.h> /* getenv */
  61. #include <sys/param.h> /* MAXPATHLEN */
  62. #include <unistd.h>
  63. #include <sys/types.h>
  64. #include <sys/utsname.h>
  65. #include "src/common/cpu_frequency.h"
  66. #include "src/common/list.h"
  67. #include "src/common/log.h"
  68. #include "src/common/mpi.h"
  69. #include "src/common/optz.h"
  70. #include "src/common/parse_time.h"
  71. #include "src/common/plugstack.h"
  72. #include "src/common/proc_args.h"
  73. #include "src/common/slurm_protocol_api.h"
  74. #include "src/common/slurm_protocol_interface.h"
  75. #include "src/common/slurm_rlimits_info.h"
  76. #include "src/common/slurm_resource_info.h"
  77. #include "src/common/uid.h"
  78. #include "src/common/xmalloc.h"
  79. #include "src/common/xstring.h"
  80. #include "src/api/pmi_server.h"
  81. #include "debugger.h"
  82. #include "launch.h"
  83. #include "multi_prog.h"
  84. #include "opt.h"
  85. /* generic OPT_ definitions -- mainly for use with env vars */
  86. #define OPT_NONE 0x00
  87. #define OPT_INT 0x01
  88. #define OPT_STRING 0x02
  89. #define OPT_IMMEDIATE 0x03
  90. #define OPT_DISTRIB 0x04
  91. #define OPT_NODES 0x05
  92. #define OPT_OVERCOMMIT 0x06
  93. #define OPT_CONN_TYPE 0x08
  94. #define OPT_RESV_PORTS 0x09
  95. #define OPT_NO_ROTATE 0x0a
  96. #define OPT_GEOMETRY 0x0b
  97. #define OPT_MPI 0x0c
  98. #define OPT_CPU_BIND 0x0d
  99. #define OPT_MEM_BIND 0x0e
  100. #define OPT_MULTI 0x0f
  101. #define OPT_NSOCKETS 0x10
  102. #define OPT_NCORES 0x11
  103. #define OPT_NTHREADS 0x12
  104. #define OPT_EXCLUSIVE 0x13
  105. #define OPT_OPEN_MODE 0x14
  106. #define OPT_ACCTG_FREQ 0x15
  107. #define OPT_WCKEY 0x16
  108. #define OPT_SIGNAL 0x17
  109. #define OPT_TIME_VAL 0x18
  110. #define OPT_CPU_FREQ 0x19
  111. /* generic getopt_long flags, integers and *not* valid characters */
  112. #define LONG_OPT_HELP 0x100
  113. #define LONG_OPT_USAGE 0x101
  114. #define LONG_OPT_XTO 0x102
  115. #define LONG_OPT_LAUNCH 0x103
  116. #define LONG_OPT_TIMEO 0x104
  117. #define LONG_OPT_JOBID 0x105
  118. #define LONG_OPT_TMP 0x106
  119. #define LONG_OPT_MEM 0x107
  120. #define LONG_OPT_MINCPUS 0x108
  121. #define LONG_OPT_CONT 0x109
  122. #define LONG_OPT_UID 0x10a
  123. #define LONG_OPT_GID 0x10b
  124. #define LONG_OPT_MPI 0x10c
  125. #define LONG_OPT_RESV_PORTS 0x10d
  126. #define LONG_OPT_DEBUG_TS 0x110
  127. #define LONG_OPT_CONNTYPE 0x111
  128. #define LONG_OPT_TEST_ONLY 0x113
  129. #define LONG_OPT_NETWORK 0x114
  130. #define LONG_OPT_EXCLUSIVE 0x115
  131. #define LONG_OPT_PROPAGATE 0x116
  132. #define LONG_OPT_PROLOG 0x117
  133. #define LONG_OPT_EPILOG 0x118
  134. #define LONG_OPT_BEGIN 0x119
  135. #define LONG_OPT_MAIL_TYPE 0x11a
  136. #define LONG_OPT_MAIL_USER 0x11b
  137. #define LONG_OPT_TASK_PROLOG 0x11c
  138. #define LONG_OPT_TASK_EPILOG 0x11d
  139. #define LONG_OPT_NICE 0x11e
  140. #define LONG_OPT_CPU_BIND 0x11f
  141. #define LONG_OPT_MEM_BIND 0x120
  142. #define LONG_OPT_MULTI 0x122
  143. #define LONG_OPT_COMMENT 0x124
  144. #define LONG_OPT_QOS 0x127
  145. #define LONG_OPT_SOCKETSPERNODE 0x130
  146. #define LONG_OPT_CORESPERSOCKET 0x131
  147. #define LONG_OPT_THREADSPERCORE 0x132
  148. #define LONG_OPT_MINSOCKETS 0x133
  149. #define LONG_OPT_MINCORES 0x134
  150. #define LONG_OPT_MINTHREADS 0x135
  151. #define LONG_OPT_NTASKSPERNODE 0x136
  152. #define LONG_OPT_NTASKSPERSOCKET 0x137
  153. #define LONG_OPT_NTASKSPERCORE 0x138
  154. #define LONG_OPT_MEM_PER_CPU 0x13a
  155. #define LONG_OPT_HINT 0x13b
  156. #define LONG_OPT_BLRTS_IMAGE 0x140
  157. #define LONG_OPT_LINUX_IMAGE 0x141
  158. #define LONG_OPT_MLOADER_IMAGE 0x142
  159. #define LONG_OPT_RAMDISK_IMAGE 0x143
  160. #define LONG_OPT_REBOOT 0x144
  161. #define LONG_OPT_GET_USER_ENV 0x145
  162. #define LONG_OPT_PTY 0x146
  163. #define LONG_OPT_CHECKPOINT 0x147
  164. #define LONG_OPT_CHECKPOINT_DIR 0x148
  165. #define LONG_OPT_OPEN_MODE 0x149
  166. #define LONG_OPT_ACCTG_FREQ 0x14a
  167. #define LONG_OPT_WCKEY 0x14b
  168. #define LONG_OPT_RESERVATION 0x14c
  169. #define LONG_OPT_RESTART_DIR 0x14d
  170. #define LONG_OPT_SIGNAL 0x14e
  171. #define LONG_OPT_DEBUG_SLURMD 0x14f
  172. #define LONG_OPT_TIME_MIN 0x150
  173. #define LONG_OPT_GRES 0x151
  174. #define LONG_OPT_ALPS 0x152
  175. #define LONG_OPT_REQ_SWITCH 0x153
  176. #define LONG_OPT_LAUNCHER_OPTS 0x154
  177. #define LONG_OPT_CPU_FREQ 0x155
  178. #define LONG_OPT_LAUNCH_CMD 0x156
  179. extern char **environ;
  180. /*---- global variables, defined in opt.h ----*/
  181. int _verbose;
  182. opt_t opt;
  183. int error_exit = 1;
  184. int immediate_exit = 1;
  185. char *mpi_type = NULL;
  186. /*---- forward declarations of static functions ----*/
  187. static bool mpi_initialized = false;
  188. typedef struct env_vars env_vars_t;
  189. /* Get a decimal integer from arg */
  190. static int _get_int(const char *arg, const char *what, bool positive);
  191. static void _help(void);
  192. /* fill in default options */
  193. static void _opt_default(void);
  194. /* set options based upon env vars */
  195. static void _opt_env(void);
  196. static void _opt_args(int argc, char **argv);
  197. /* list known options and their settings */
  198. static void _opt_list(void);
  199. /* verify options sanity */
  200. static bool _opt_verify(void);
  201. static void _process_env_var(env_vars_t *e, const char *val);
  202. static bool _under_parallel_debugger(void);
  203. static void _usage(void);
  204. static bool _valid_node_list(char **node_list_pptr);
  205. /*---[ end forward declarations of static functions ]---------------------*/
  206. int initialize_and_process_args(int argc, char *argv[])
  207. {
  208. /* initialize option defaults */
  209. _opt_default();
  210. /* initialize options with env vars */
  211. _opt_env();
  212. /* initialize options with argv */
  213. _opt_args(argc, argv);
  214. if (!_opt_verify())
  215. exit(error_exit);
  216. if (_verbose > 3)
  217. _opt_list();
  218. if (opt.launch_cmd) {
  219. char *launch_type = slurm_get_launch_type();
  220. if (!strcmp(launch_type, "launch/slurm")) {
  221. error("--launch-cmd option is invalid with %s",
  222. launch_type);
  223. xfree(launch_type);
  224. exit(1);
  225. }
  226. xfree(launch_type);
  227. launch_g_create_job_step(NULL, 0, NULL, NULL);
  228. exit(0);
  229. }
  230. return 1;
  231. }
  232. /*
  233. * If the node list supplied is a file name, translate that into
  234. * a list of nodes, we orphan the data pointed to
  235. * RET true if the node list is a valid one
  236. */
  237. static bool _valid_node_list(char **node_list_pptr)
  238. {
  239. int count = NO_VAL;
  240. /* If we are using Arbitrary and we specified the number of
  241. procs to use then we need exactly this many since we are
  242. saying, lay it out this way! Same for max and min nodes.
  243. Other than that just read in as many in the hostfile */
  244. if(opt.ntasks_set)
  245. count = opt.ntasks;
  246. else if(opt.nodes_set) {
  247. if(opt.max_nodes)
  248. count = opt.max_nodes;
  249. else if(opt.min_nodes)
  250. count = opt.min_nodes;
  251. }
  252. return verify_node_list(node_list_pptr, opt.distribution, count);
  253. }
  254. /*
  255. * print error message to stderr with opt.progname prepended
  256. */
  257. #undef USE_ARGERROR
  258. #if USE_ARGERROR
  259. static void argerror(const char *msg, ...)
  260. __attribute__ ((format (printf, 1, 2)));
  261. static void argerror(const char *msg, ...)
  262. {
  263. va_list ap;
  264. char buf[256];
  265. va_start(ap, msg);
  266. vsnprintf(buf, sizeof(buf), msg, ap);
  267. fprintf(stderr, "%s: %s\n",
  268. opt.progname ? opt.progname : "srun", buf);
  269. va_end(ap);
  270. }
  271. #else
  272. # define argerror error
  273. #endif /* USE_ARGERROR */
  274. /*
  275. * _opt_default(): used by initialize_and_process_args to set defaults
  276. */
  277. static void _opt_default()
  278. {
  279. char buf[MAXPATHLEN + 1];
  280. int i;
  281. uid_t uid = getuid();
  282. opt.user = uid_to_string(uid);
  283. if (strcmp(opt.user, "nobody") == 0)
  284. fatal("Invalid user id: %u", uid);
  285. opt.uid = uid;
  286. opt.gid = getgid();
  287. if ((getcwd(buf, MAXPATHLEN)) == NULL) {
  288. error("getcwd failed: %m");
  289. exit(error_exit);
  290. }
  291. opt.cwd = xstrdup(buf);
  292. opt.cwd_set = false;
  293. opt.progname = NULL;
  294. opt.ntasks = 1;
  295. opt.ntasks_set = false;
  296. opt.cpus_per_task = 0;
  297. opt.cpus_set = false;
  298. opt.min_nodes = 1;
  299. opt.max_nodes = 0;
  300. opt.sockets_per_node = NO_VAL; /* requested sockets */
  301. opt.cores_per_socket = NO_VAL; /* requested cores */
  302. opt.threads_per_core = NO_VAL; /* requested threads */
  303. opt.ntasks_per_node = NO_VAL; /* ntask max limits */
  304. opt.ntasks_per_socket = NO_VAL;
  305. opt.ntasks_per_core = NO_VAL;
  306. opt.nodes_set = false;
  307. opt.nodes_set_env = false;
  308. opt.nodes_set_opt = false;
  309. opt.cpu_bind_type = 0;
  310. opt.cpu_bind = NULL;
  311. opt.mem_bind_type = 0;
  312. opt.mem_bind = NULL;
  313. opt.time_limit = NO_VAL;
  314. opt.time_limit_str = NULL;
  315. opt.time_min = NO_VAL;
  316. opt.time_min_str = NULL;
  317. opt.ckpt_interval = 0;
  318. opt.ckpt_interval_str = NULL;
  319. opt.ckpt_dir = NULL;
  320. opt.restart_dir = NULL;
  321. opt.partition = NULL;
  322. opt.max_threads = MAX_THREADS;
  323. pmi_server_max_threads(opt.max_threads);
  324. opt.relative = NO_VAL;
  325. opt.relative_set = false;
  326. opt.resv_port_cnt = NO_VAL;
  327. opt.cmd_name = NULL;
  328. opt.job_name = NULL;
  329. opt.job_name_set_cmd = false;
  330. opt.job_name_set_env = false;
  331. opt.jobid = NO_VAL;
  332. opt.jobid_set = false;
  333. opt.dependency = NULL;
  334. opt.account = NULL;
  335. opt.comment = NULL;
  336. opt.qos = NULL;
  337. opt.distribution = SLURM_DIST_UNKNOWN;
  338. opt.plane_size = NO_VAL;
  339. opt.ofname = NULL;
  340. opt.ifname = NULL;
  341. opt.efname = NULL;
  342. opt.labelio = false;
  343. opt.unbuffered = false;
  344. opt.overcommit = false;
  345. opt.shared = (uint16_t)NO_VAL;
  346. opt.exclusive = false;
  347. opt.no_kill = false;
  348. opt.kill_bad_exit = NO_VAL;
  349. opt.immediate = 0;
  350. opt.join = false;
  351. opt.max_wait = slurm_get_wait_time();
  352. opt.quit_on_intr = false;
  353. opt.disable_status = false;
  354. opt.test_only = false;
  355. opt.preserve_env = false;
  356. opt.quiet = 0;
  357. _verbose = 0;
  358. opt.slurmd_debug = LOG_LEVEL_QUIET;
  359. opt.warn_signal = 0;
  360. opt.warn_time = 0;
  361. opt.pn_min_cpus = NO_VAL;
  362. opt.pn_min_memory = NO_VAL;
  363. opt.mem_per_cpu = NO_VAL;
  364. opt.pn_min_tmp_disk= NO_VAL;
  365. opt.hold = false;
  366. opt.constraints = NULL;
  367. opt.gres = NULL;
  368. opt.contiguous = false;
  369. opt.hostfile = NULL;
  370. opt.nodelist = NULL;
  371. opt.exc_nodes = NULL;
  372. opt.max_launch_time = 120;/* 120 seconds to launch job */
  373. opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */
  374. /* Default launch msg timeout */
  375. opt.msg_timeout = slurm_get_msg_timeout();
  376. for (i=0; i<HIGHEST_DIMENSIONS; i++) {
  377. opt.conn_type[i] = (uint16_t) NO_VAL;
  378. opt.geometry[i] = (uint16_t) NO_VAL;
  379. }
  380. opt.reboot = false;
  381. opt.no_rotate = false;
  382. opt.blrtsimage = NULL;
  383. opt.linuximage = NULL;
  384. opt.mloaderimage = NULL;
  385. opt.ramdiskimage = NULL;
  386. opt.euid = (uid_t) -1;
  387. opt.egid = (gid_t) -1;
  388. opt.propagate = NULL; /* propagate specific rlimits */
  389. opt.prolog = slurm_get_srun_prolog();
  390. opt.epilog = slurm_get_srun_epilog();
  391. opt.begin = (time_t)0;
  392. opt.task_prolog = NULL;
  393. opt.task_epilog = NULL;
  394. /*
  395. * Reset some default values if running under a parallel debugger
  396. */
  397. if ((opt.parallel_debug = _under_parallel_debugger())) {
  398. opt.max_launch_time = 120;
  399. opt.max_threads = 1;
  400. pmi_server_max_threads(opt.max_threads);
  401. opt.msg_timeout = 15;
  402. }
  403. opt.pty = false;
  404. opt.open_mode = 0;
  405. opt.acctg_freq = -1;
  406. opt.cpu_freq = NO_VAL;
  407. opt.reservation = NULL;
  408. opt.wckey = NULL;
  409. opt.req_switch = -1;
  410. opt.wait4switch = -1;
  411. opt.launcher_opts = NULL;
  412. opt.launch_cmd = false;
  413. }
  414. /*---[ env var processing ]-----------------------------------------------*/
  415. /*
  416. * try to use a similar scheme as popt.
  417. *
  418. * in order to add a new env var (to be processed like an option):
  419. *
  420. * define a new entry into env_vars[], if the option is a simple int
  421. * or string you may be able to get away with adding a pointer to the
  422. * option to set. Otherwise, process var based on "type" in _opt_env.
  423. */
  424. struct env_vars {
  425. const char *var;
  426. int type;
  427. void *arg;
  428. void *set_flag;
  429. };
  430. env_vars_t env_vars[] = {
  431. {"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL },
  432. {"SLURM_ACCOUNT", OPT_STRING, &opt.account, NULL },
  433. {"SLURM_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL },
  434. {"SLURM_BLRTS_IMAGE", OPT_STRING, &opt.blrtsimage, NULL },
  435. {"SLURM_CHECKPOINT", OPT_STRING, &opt.ckpt_interval_str, NULL },
  436. {"SLURM_CHECKPOINT_DIR",OPT_STRING, &opt.ckpt_dir, NULL },
  437. {"SLURM_CNLOAD_IMAGE", OPT_STRING, &opt.linuximage, NULL },
  438. {"SLURM_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL },
  439. {"SLURM_CPUS_PER_TASK", OPT_INT, &opt.cpus_per_task, &opt.cpus_set },
  440. {"SLURM_CPU_BIND", OPT_CPU_BIND, NULL, NULL },
  441. {"SLURM_CPU_FREQ_REQ", OPT_CPU_FREQ, NULL, NULL },
  442. {"SLURM_DEPENDENCY", OPT_STRING, &opt.dependency, NULL },
  443. {"SLURM_DISABLE_STATUS",OPT_INT, &opt.disable_status,NULL },
  444. {"SLURM_DISTRIBUTION", OPT_DISTRIB, NULL, NULL },
  445. {"SLURM_EPILOG", OPT_STRING, &opt.epilog, NULL },
  446. {"SLURM_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL },
  447. {"SLURM_GEOMETRY", OPT_GEOMETRY, NULL, NULL },
  448. {"SLURM_GRES", OPT_STRING, &opt.gres, NULL },
  449. {"SLURM_IMMEDIATE", OPT_IMMEDIATE, NULL, NULL },
  450. {"SLURM_IOLOAD_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL },
  451. /* SLURM_JOBID was used in slurm version 1.3 and below, it is now vestigial */
  452. {"SLURM_JOBID", OPT_INT, &opt.jobid, NULL },
  453. {"SLURM_JOB_ID", OPT_INT, &opt.jobid, NULL },
  454. {"SLURM_JOB_NAME", OPT_STRING, &opt.job_name, &opt.job_name_set_env},
  455. {"SLURM_KILL_BAD_EXIT", OPT_INT, &opt.kill_bad_exit, NULL },
  456. {"SLURM_LABELIO", OPT_INT, &opt.labelio, NULL },
  457. {"SLURM_LINUX_IMAGE", OPT_STRING, &opt.linuximage, NULL },
  458. {"SLURM_MEM_BIND", OPT_MEM_BIND, NULL, NULL },
  459. {"SLURM_MEM_PER_CPU", OPT_INT, &opt.mem_per_cpu, NULL },
  460. {"SLURM_MEM_PER_NODE", OPT_INT, &opt.pn_min_memory, NULL },
  461. {"SLURM_MLOADER_IMAGE", OPT_STRING, &opt.mloaderimage, NULL },
  462. {"SLURM_MPI_TYPE", OPT_MPI, NULL, NULL },
  463. {"SLURM_NCORES_PER_SOCKET",OPT_NCORES, NULL, NULL },
  464. {"SLURM_NETWORK", OPT_STRING, &opt.network, NULL },
  465. {"SLURM_NNODES", OPT_NODES, NULL, NULL },
  466. {"SLURM_NODELIST", OPT_STRING, &opt.alloc_nodelist,NULL },
  467. {"SLURM_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL },
  468. {"SLURM_NTASKS", OPT_INT, &opt.ntasks, &opt.ntasks_set },
  469. {"SLURM_NPROCS", OPT_INT, &opt.ntasks, &opt.ntasks_set },
  470. {"SLURM_NSOCKETS_PER_NODE",OPT_NSOCKETS,NULL, NULL },
  471. {"SLURM_NTASKS_PER_NODE", OPT_INT, &opt.ntasks_per_node, NULL },
  472. {"SLURM_NTHREADS_PER_CORE",OPT_NTHREADS,NULL, NULL },
  473. {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL },
  474. {"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL },
  475. {"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL },
  476. {"SLURM_PROLOG", OPT_STRING, &opt.prolog, NULL },
  477. {"SLURM_QOS", OPT_STRING, &opt.qos, NULL },
  478. {"SLURM_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL },
  479. {"SLURM_REMOTE_CWD", OPT_STRING, &opt.cwd, NULL },
  480. {"SLURM_RESTART_DIR", OPT_STRING, &opt.restart_dir , NULL },
  481. {"SLURM_RESV_PORTS", OPT_RESV_PORTS, NULL, NULL },
  482. {"SLURM_SIGNAL", OPT_SIGNAL, NULL, NULL },
  483. {"SLURM_SRUN_MULTI", OPT_MULTI, NULL, NULL },
  484. {"SLURM_STDERRMODE", OPT_STRING, &opt.efname, NULL },
  485. {"SLURM_STDINMODE", OPT_STRING, &opt.ifname, NULL },
  486. {"SLURM_STDOUTMODE", OPT_STRING, &opt.ofname, NULL },
  487. {"SLURM_TASK_EPILOG", OPT_STRING, &opt.task_epilog, NULL },
  488. {"SLURM_TASK_PROLOG", OPT_STRING, &opt.task_prolog, NULL },
  489. {"SLURM_THREADS", OPT_INT, &opt.max_threads, NULL },
  490. {"SLURM_TIMELIMIT", OPT_STRING, &opt.time_limit_str,NULL },
  491. {"SLURM_UNBUFFEREDIO", OPT_INT, &opt.unbuffered, NULL },
  492. {"SLURM_WAIT", OPT_INT, &opt.max_wait, NULL },
  493. {"SLURM_WCKEY", OPT_STRING, &opt.wckey, NULL },
  494. {"SLURM_WORKING_DIR", OPT_STRING, &opt.cwd, &opt.cwd_set },
  495. {"SLURM_REQ_SWITCH", OPT_INT, &opt.req_switch, NULL },
  496. {"SLURM_WAIT4SWITCH", OPT_TIME_VAL, NULL, NULL },
  497. {NULL, 0, NULL, NULL}
  498. };
  499. /*
  500. * _opt_env(): used by initialize_and_process_args to set options via
  501. * environment variables. See comments above for how to
  502. * extend srun to process different vars
  503. */
  504. static void _opt_env()
  505. {
  506. char *val = NULL;
  507. env_vars_t *e = env_vars;
  508. while (e->var) {
  509. if ((val = getenv(e->var)) != NULL)
  510. _process_env_var(e, val);
  511. e++;
  512. }
  513. }
  514. static void
  515. _process_env_var(env_vars_t *e, const char *val)
  516. {
  517. char *end = NULL;
  518. task_dist_states_t dt;
  519. debug2("now processing env var %s=%s", e->var, val);
  520. if (e->set_flag) {
  521. *((bool *) e->set_flag) = true;
  522. }
  523. switch (e->type) {
  524. case OPT_STRING:
  525. *((char **) e->arg) = xstrdup(val);
  526. break;
  527. case OPT_INT:
  528. if (val != NULL) {
  529. *((int *) e->arg) = (int) strtol(val, &end, 10);
  530. if (!(end && *end == '\0')) {
  531. error("%s=%s invalid. ignoring...",
  532. e->var, val);
  533. }
  534. }
  535. break;
  536. case OPT_DISTRIB:
  537. if (strcmp(val, "unknown") == 0)
  538. break; /* ignore it, passed from salloc */
  539. dt = verify_dist_type(val, &opt.plane_size);
  540. if (dt == SLURM_DIST_UNKNOWN) {
  541. error("\"%s=%s\" -- invalid distribution type. "
  542. "ignoring...", e->var, val);
  543. } else
  544. opt.distribution = dt;
  545. break;
  546. case OPT_CPU_BIND:
  547. if (slurm_verify_cpu_bind(val, &opt.cpu_bind,
  548. &opt.cpu_bind_type))
  549. exit(error_exit);
  550. break;
  551. case OPT_CPU_FREQ:
  552. if (cpu_freq_verify_param(val, &opt.cpu_freq))
  553. error("Invalid --cpu-freq argument: %s. Ignored", val);
  554. break;
  555. case OPT_MEM_BIND:
  556. if (slurm_verify_mem_bind(val, &opt.mem_bind,
  557. &opt.mem_bind_type))
  558. exit(error_exit);
  559. break;
  560. case OPT_NODES:
  561. opt.nodes_set_env = get_resource_arg_range( val ,"OPT_NODES",
  562. &opt.min_nodes,
  563. &opt.max_nodes,
  564. false);
  565. if (opt.nodes_set_env == false) {
  566. error("\"%s=%s\" -- invalid node count. ignoring...",
  567. e->var, val);
  568. } else
  569. opt.nodes_set = opt.nodes_set_env;
  570. break;
  571. case OPT_OVERCOMMIT:
  572. opt.overcommit = true;
  573. break;
  574. case OPT_EXCLUSIVE:
  575. opt.exclusive = true;
  576. opt.shared = 0;
  577. break;
  578. case OPT_RESV_PORTS:
  579. if (val)
  580. opt.resv_port_cnt = strtol(val, NULL, 10);
  581. else
  582. opt.resv_port_cnt = 0;
  583. break;
  584. case OPT_OPEN_MODE:
  585. if ((val[0] == 'a') || (val[0] == 'A'))
  586. opt.open_mode = OPEN_MODE_APPEND;
  587. else if ((val[0] == 't') || (val[0] == 'T'))
  588. opt.open_mode = OPEN_MODE_TRUNCATE;
  589. else
  590. error("Invalid SLURM_OPEN_MODE: %s. Ignored", val);
  591. break;
  592. case OPT_CONN_TYPE:
  593. verify_conn_type(val, opt.conn_type);
  594. break;
  595. case OPT_NO_ROTATE:
  596. opt.no_rotate = true;
  597. break;
  598. case OPT_GEOMETRY:
  599. if (verify_geometry(val, opt.geometry)) {
  600. error("\"%s=%s\" -- invalid geometry, ignoring...",
  601. e->var, val);
  602. }
  603. break;
  604. case OPT_IMMEDIATE:
  605. if (val)
  606. opt.immediate = strtol(val, NULL, 10);
  607. else
  608. opt.immediate = DEFAULT_IMMEDIATE;
  609. break;
  610. case OPT_MPI:
  611. xfree(mpi_type);
  612. mpi_type = xstrdup(val);
  613. if (mpi_hook_client_init((char *)val) == SLURM_ERROR) {
  614. error("\"%s=%s\" -- invalid MPI type, "
  615. "--mpi=list for acceptable types.",
  616. e->var, val);
  617. exit(error_exit);
  618. }
  619. mpi_initialized = true;
  620. break;
  621. case OPT_SIGNAL:
  622. if (get_signal_opts((char *)val, &opt.warn_signal,
  623. &opt.warn_time)) {
  624. error("Invalid signal specification: %s", val);
  625. exit(error_exit);
  626. }
  627. break;
  628. case OPT_TIME_VAL:
  629. opt.wait4switch = time_str2secs(val);
  630. break;
  631. default:
  632. /* do nothing */
  633. break;
  634. }
  635. }
  636. /*
  637. * Get a decimal integer from arg.
  638. *
  639. * Returns the integer on success, exits program on failure.
  640. *
  641. */
  642. static int
  643. _get_int(const char *arg, const char *what, bool positive)
  644. {
  645. char *p;
  646. long int result = strtol(arg, &p, 10);
  647. if ((*p != '\0') || (result < 0L)
  648. || (positive && (result <= 0L))) {
  649. error ("Invalid numeric value \"%s\" for %s.", arg, what);
  650. exit(error_exit);
  651. } else if (result > INT_MAX) {
  652. error ("Numeric argument (%ld) to big for %s.", result, what);
  653. } else if (result < INT_MIN) {
  654. error ("Numeric argument %ld to small for %s.", result, what);
  655. }
  656. return (int) result;
  657. }
  658. static void set_options(const int argc, char **argv)
  659. {
  660. int opt_char, option_index = 0, max_val = 0, tmp_int;
  661. struct utsname name;
  662. static struct option long_options[] = {
  663. {"account", required_argument, 0, 'A'},
  664. {"extra-node-info", required_argument, 0, 'B'},
  665. {"cpus-per-task", required_argument, 0, 'c'},
  666. {"constraint", required_argument, 0, 'C'},
  667. {"dependency", required_argument, 0, 'd'},
  668. {"chdir", required_argument, 0, 'D'},
  669. {"error", required_argument, 0, 'e'},
  670. {"preserve-env", no_argument, 0, 'E'},
  671. {"preserve-slurm-env", no_argument, 0, 'E'},
  672. {"geometry", required_argument, 0, 'g'},
  673. {"hold", no_argument, 0, 'H'},
  674. {"input", required_argument, 0, 'i'},
  675. {"immediate", optional_argument, 0, 'I'},
  676. {"join", no_argument, 0, 'j'},
  677. {"job-name", required_argument, 0, 'J'},
  678. {"no-kill", no_argument, 0, 'k'},
  679. {"kill-on-bad-exit", optional_argument, 0, 'K'},
  680. {"label", no_argument, 0, 'l'},
  681. {"licenses", required_argument, 0, 'L'},
  682. {"distribution", required_argument, 0, 'm'},
  683. {"ntasks", required_argument, 0, 'n'},
  684. {"nodes", required_argument, 0, 'N'},
  685. {"output", required_argument, 0, 'o'},
  686. {"overcommit", no_argument, 0, 'O'},
  687. {"partition", required_argument, 0, 'p'},
  688. {"quit-on-interrupt", no_argument, 0, 'q'},
  689. {"quiet", no_argument, 0, 'Q'},
  690. {"relative", required_argument, 0, 'r'},
  691. {"no-rotate", no_argument, 0, 'R'},
  692. {"share", no_argument, 0, 's'},
  693. {"time", required_argument, 0, 't'},
  694. {"threads", required_argument, 0, 'T'},
  695. {"unbuffered", no_argument, 0, 'u'},
  696. {"verbose", no_argument, 0, 'v'},
  697. {"version", no_argument, 0, 'V'},
  698. {"nodelist", required_argument, 0, 'w'},
  699. {"wait", required_argument, 0, 'W'},
  700. {"exclude", required_argument, 0, 'x'},
  701. {"disable-status", no_argument, 0, 'X'},
  702. {"no-allocate", no_argument, 0, 'Z'},
  703. {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ},
  704. {"alps", required_argument, 0, LONG_OPT_ALPS},
  705. {"begin", required_argument, 0, LONG_OPT_BEGIN},
  706. {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE},
  707. {"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT},
  708. {"checkpoint-dir", required_argument, 0, LONG_OPT_CHECKPOINT_DIR},
  709. {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE},
  710. {"comment", required_argument, 0, LONG_OPT_COMMENT},
  711. {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE},
  712. {"contiguous", no_argument, 0, LONG_OPT_CONT},
  713. {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET},
  714. {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND},
  715. {"cpu-freq", required_argument, 0, LONG_OPT_CPU_FREQ},
  716. {"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS},
  717. {"epilog", required_argument, 0, LONG_OPT_EPILOG},
  718. {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE},
  719. {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV},
  720. {"gid", required_argument, 0, LONG_OPT_GID},
  721. {"gres", required_argument, 0, LONG_OPT_GRES},
  722. {"help", no_argument, 0, LONG_OPT_HELP},
  723. {"hint", required_argument, 0, LONG_OPT_HINT},
  724. {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE},
  725. {"jobid", required_argument, 0, LONG_OPT_JOBID},
  726. {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE},
  727. {"launch-cmd", no_argument, 0, LONG_OPT_LAUNCH_CMD},
  728. {"launcher-opts", required_argument, 0, LONG_OPT_LAUNCHER_OPTS},
  729. {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE},
  730. {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER},
  731. {"max-exit-timeout", required_argument, 0, LONG_OPT_XTO},
  732. {"max-launch-time", required_argument, 0, LONG_OPT_LAUNCH},
  733. {"mem", required_argument, 0, LONG_OPT_MEM},
  734. {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU},
  735. {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND},
  736. {"mincores", required_argument, 0, LONG_OPT_MINCORES},
  737. {"mincpus", required_argument, 0, LONG_OPT_MINCPUS},
  738. {"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS},
  739. {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS},
  740. {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE},
  741. {"mpi", required_argument, 0, LONG_OPT_MPI},
  742. {"msg-timeout", required_argument, 0, LONG_OPT_TIMEO},
  743. {"multi-prog", no_argument, 0, LONG_OPT_MULTI},
  744. {"network", required_argument, 0, LONG_OPT_NETWORK},
  745. {"nice", optional_argument, 0, LONG_OPT_NICE},
  746. {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE},
  747. {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE},
  748. {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET},
  749. {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE},
  750. {"prolog", required_argument, 0, LONG_OPT_PROLOG},
  751. {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE},
  752. {"pty", no_argument, 0, LONG_OPT_PTY},
  753. {"qos", required_argument, 0, LONG_OPT_QOS},
  754. {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE},
  755. {"reboot", no_argument, 0, LONG_OPT_REBOOT},
  756. {"reservation", required_argument, 0, LONG_OPT_RESERVATION},
  757. {"restart-dir", required_argument, 0, LONG_OPT_RESTART_DIR},
  758. {"resv-ports", optional_argument, 0, LONG_OPT_RESV_PORTS},
  759. {"runjob-opts", required_argument, 0, LONG_OPT_LAUNCHER_OPTS},
  760. {"signal", required_argument, 0, LONG_OPT_SIGNAL},
  761. {"slurmd-debug", required_argument, 0, LONG_OPT_DEBUG_SLURMD},
  762. {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
  763. {"switches", required_argument, 0, LONG_OPT_REQ_SWITCH},
  764. {"task-epilog", required_argument, 0, LONG_OPT_TASK_EPILOG},
  765. {"task-prolog", required_argument, 0, LONG_OPT_TASK_PROLOG},
  766. {"tasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE},
  767. {"test-only", no_argument, 0, LONG_OPT_TEST_ONLY},
  768. {"time-min", required_argument, 0, LONG_OPT_TIME_MIN},
  769. {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE},
  770. {"tmp", required_argument, 0, LONG_OPT_TMP},
  771. {"uid", required_argument, 0, LONG_OPT_UID},
  772. {"usage", no_argument, 0, LONG_OPT_USAGE},
  773. {"wckey", required_argument, 0, LONG_OPT_WCKEY},
  774. {NULL, 0, 0, 0}
  775. };
  776. char *opt_string = "+A:B:c:C:d:D:e:Eg:hHi:I::jJ:kK::lL:m:n:N:"
  777. "o:Op:P:qQr:Rst:T:uU:vVw:W:x:XZ";
  778. char *pos_delimit;
  779. #ifdef HAVE_PTY_H
  780. char *tmp_str;
  781. #endif
  782. struct option *optz = spank_option_table_create (long_options);
  783. if (!optz) {
  784. error("Unable to create option table");
  785. exit(error_exit);
  786. }
  787. if (opt.progname == NULL)
  788. opt.progname = xbasename(argv[0]);
  789. else
  790. error("opt.progname is already set.");
  791. optind = 0;
  792. while((opt_char = getopt_long(argc, argv, opt_string,
  793. optz, &option_index)) != -1) {
  794. switch (opt_char) {
  795. case (int)'?':
  796. fprintf(stderr,
  797. "Try \"srun --help\" for more information\n");
  798. exit(error_exit);
  799. break;
  800. case (int)'A':
  801. case (int)'U': /* backwards compatibility */
  802. xfree(opt.account);
  803. opt.account = xstrdup(optarg);
  804. break;
  805. case (int)'B':
  806. opt.extra_set = verify_socket_core_thread_count(
  807. optarg,
  808. &opt.sockets_per_node,
  809. &opt.cores_per_socket,
  810. &opt.threads_per_core,
  811. &opt.cpu_bind_type);
  812. if (opt.extra_set == false) {
  813. error("invalid resource allocation -B `%s'",
  814. optarg);
  815. exit(error_exit);
  816. }
  817. break;
  818. case (int)'c':
  819. tmp_int = _get_int(optarg, "cpus-per-task", false);
  820. if (opt.cpus_set && (tmp_int > opt.cpus_per_task)) {
  821. info("Job step's --cpus-per-task value exceeds"
  822. " that of job (%d > %d). Job step may "
  823. "never run.", tmp_int, opt.cpus_per_task);
  824. }
  825. opt.cpus_set = true;
  826. opt.cpus_per_task = tmp_int;
  827. break;
  828. case (int)'C':
  829. xfree(opt.constraints);
  830. opt.constraints = xstrdup(optarg);
  831. break;
  832. case (int)'d':
  833. xfree(opt.dependency);
  834. opt.dependency = xstrdup(optarg);
  835. break;
  836. case (int)'D':
  837. opt.cwd_set = true;
  838. xfree(opt.cwd);
  839. opt.cwd = xstrdup(optarg);
  840. break;
  841. case (int)'e':
  842. if (opt.pty) {
  843. fatal("--error incompatible with --pty "
  844. "option");
  845. exit(error_exit);
  846. }
  847. xfree(opt.efname);
  848. if (strcasecmp(optarg, "none") == 0)
  849. opt.efname = xstrdup("/dev/null");
  850. else
  851. opt.efname = xstrdup(optarg);
  852. break;
  853. case (int)'E':
  854. opt.preserve_env = true;
  855. break;
  856. case (int)'g':
  857. if (verify_geometry(optarg, opt.geometry))
  858. exit(error_exit);
  859. break;
  860. case (int)'H':
  861. opt.hold = true;
  862. break;
  863. case (int)'i':
  864. if (opt.pty) {
  865. fatal("--input incompatible with "
  866. "--pty option");
  867. exit(error_exit);
  868. }
  869. xfree(opt.ifname);
  870. if (strcasecmp(optarg, "none") == 0)
  871. opt.ifname = xstrdup("/dev/null");
  872. else
  873. opt.ifname = xstrdup(optarg);
  874. break;
  875. case (int)'I':
  876. if (optarg)
  877. opt.immediate = strtol(optarg, NULL, 10);
  878. else
  879. opt.immediate = DEFAULT_IMMEDIATE;
  880. break;
  881. case (int)'j':
  882. opt.join = true;
  883. break;
  884. case (int)'J':
  885. opt.job_name_set_cmd = true;
  886. xfree(opt.job_name);
  887. opt.job_name = xstrdup(optarg);
  888. break;
  889. case (int)'k':
  890. opt.no_kill = true;
  891. break;
  892. case (int)'K':
  893. if (optarg)
  894. opt.kill_bad_exit = strtol(optarg, NULL, 10);
  895. else
  896. opt.kill_bad_exit = 1;
  897. break;
  898. case (int)'l':
  899. opt.labelio = true;
  900. break;
  901. case 'L':
  902. xfree(opt.licenses);
  903. opt.licenses = xstrdup(optarg);
  904. break;
  905. case (int)'m':
  906. opt.distribution = verify_dist_type(optarg,
  907. &opt.plane_size);
  908. if (opt.distribution == SLURM_DIST_UNKNOWN) {
  909. error("distribution type `%s' "
  910. "is not recognized", optarg);
  911. exit(error_exit);
  912. }
  913. break;
  914. case (int)'n':
  915. opt.ntasks_set = true;
  916. opt.ntasks =
  917. _get_int(optarg, "number of tasks", true);
  918. break;
  919. case (int)'N':
  920. opt.nodes_set_opt =
  921. get_resource_arg_range( optarg,
  922. "requested node count",
  923. &opt.min_nodes,
  924. &opt.max_nodes, true );
  925. if (opt.nodes_set_opt == false) {
  926. error("invalid resource allocation -N `%s'",
  927. optarg);
  928. exit(error_exit);
  929. } else
  930. opt.nodes_set = opt.nodes_set_opt;
  931. break;
  932. case (int)'o':
  933. if (opt.pty) {
  934. error("--output incompatible with --pty "
  935. "option");
  936. exit(error_exit);
  937. }
  938. xfree(opt.ofname);
  939. if (strcasecmp(optarg, "none") == 0)
  940. opt.ofname = xstrdup("/dev/null");
  941. else
  942. opt.ofname = xstrdup(optarg);
  943. break;
  944. case (int)'O':
  945. opt.overcommit = true;
  946. break;
  947. case (int)'p':
  948. xfree(opt.partition);
  949. opt.partition = xstrdup(optarg);
  950. break;
  951. case (int)'P':
  952. verbose("-P option is deprecated, use -d instead");
  953. xfree(opt.dependency);
  954. opt.dependency = xstrdup(optarg);
  955. break;
  956. case (int)'q':
  957. opt.quit_on_intr = true;
  958. break;
  959. case (int) 'Q':
  960. opt.quiet++;
  961. break;
  962. case (int)'r':
  963. opt.relative = _get_int(optarg, "relative", false);
  964. opt.relative_set = true;
  965. break;
  966. case (int)'R':
  967. opt.no_rotate = true;
  968. break;
  969. case (int)'s':
  970. opt.shared = 1;
  971. break;
  972. case (int)'t':
  973. xfree(opt.time_limit_str);
  974. opt.time_limit_str = xstrdup(optarg);
  975. break;
  976. case (int)'T':
  977. opt.max_threads =
  978. _get_int(optarg, "max_threads", true);
  979. pmi_server_max_threads(opt.max_threads);
  980. break;
  981. case (int)'u':
  982. opt.unbuffered = true;
  983. break;
  984. case (int)'v':
  985. _verbose++;
  986. break;
  987. case (int)'V':
  988. print_slurm_version();
  989. exit(0);
  990. break;
  991. case (int)'w':
  992. xfree(opt.nodelist);
  993. opt.nodelist = xstrdup(optarg);
  994. break;
  995. case (int)'W':
  996. opt.max_wait = _get_int(optarg, "wait", false);
  997. break;
  998. case (int)'x':
  999. xfree(opt.exc_nodes);
  1000. opt.exc_nodes = xstrdup(optarg);
  1001. if (!_valid_node_list(&opt.exc_nodes))
  1002. exit(error_exit);
  1003. break;
  1004. case (int)'X':
  1005. opt.disable_status = true;
  1006. break;
  1007. case (int)'Z':
  1008. opt.no_alloc = true;
  1009. uname(&name);
  1010. if (strcasecmp(name.sysname, "AIX") == 0)
  1011. opt.network = xstrdup("ip");
  1012. break;
  1013. case LONG_OPT_CONT:
  1014. opt.contiguous = true;
  1015. break;
  1016. case LONG_OPT_EXCLUSIVE:
  1017. opt.exclusive = true;
  1018. opt.shared = 0;
  1019. break;
  1020. case LONG_OPT_CPU_BIND:
  1021. if (slurm_verify_cpu_bind(optarg, &opt.cpu_bind,
  1022. &opt.cpu_bind_type))
  1023. exit(error_exit);
  1024. break;
  1025. case LONG_OPT_LAUNCH_CMD:
  1026. opt.launch_cmd = true;
  1027. break;
  1028. case LONG_OPT_MEM_BIND:
  1029. if (slurm_verify_mem_bind(optarg, &opt.mem_bind,
  1030. &opt.mem_bind_type))
  1031. exit(error_exit);
  1032. break;
  1033. case LONG_OPT_MINCPUS:
  1034. opt.pn_min_cpus = _get_int(optarg, "mincpus", true);
  1035. break;
  1036. case LONG_OPT_MINCORES:
  1037. verbose("mincores option has been deprecated, use "
  1038. "cores-per-socket");
  1039. opt.cores_per_socket = _get_int(optarg,
  1040. "mincores", true);
  1041. if (opt.cores_per_socket < 0) {
  1042. error("invalid mincores constraint %s",
  1043. optarg);
  1044. exit(error_exit);
  1045. }
  1046. break;
  1047. case LONG_OPT_MINSOCKETS:
  1048. verbose("minsockets option has been deprecated, use "
  1049. "sockets-per-node");
  1050. opt.sockets_per_node = _get_int(optarg,
  1051. "minsockets",true);
  1052. if (opt.sockets_per_node < 0) {
  1053. error("invalid minsockets constraint %s",
  1054. optarg);
  1055. exit(error_exit);
  1056. }
  1057. break;
  1058. case LONG_OPT_MINTHREADS:
  1059. verbose("minthreads option has been deprecated, use "
  1060. "threads-per-core");
  1061. opt.threads_per_core = _get_int(optarg,
  1062. "minthreads",true);
  1063. if (opt.threads_per_core < 0) {
  1064. error("invalid minthreads constraint %s",
  1065. optarg);
  1066. exit(error_exit);
  1067. }
  1068. break;
  1069. case LONG_OPT_MEM:
  1070. opt.pn_min_memory = (int) str_to_mbytes(optarg);
  1071. if (opt.pn_min_memory < 0) {
  1072. error("invalid memory constraint %s",
  1073. optarg);
  1074. exit(error_exit);
  1075. }
  1076. break;
  1077. case LONG_OPT_MEM_PER_CPU:
  1078. opt.mem_per_cpu = (int) str_to_mbytes(optarg);
  1079. if (opt.mem_per_cpu < 0) {
  1080. error("invalid memory constraint %s",
  1081. optarg);
  1082. exit(error_exit);
  1083. }
  1084. break;
  1085. case LONG_OPT_MPI:
  1086. xfree(mpi_type);
  1087. mpi_type = xstrdup(optarg);
  1088. if (mpi_hook_client_init((char *)optarg)
  1089. == SLURM_ERROR) {
  1090. error("\"--mpi=%s\" -- long invalid MPI type, "
  1091. "--mpi=list for acceptable types.",
  1092. optarg);
  1093. exit(error_exit);
  1094. }
  1095. mpi_initialized = true;
  1096. break;
  1097. case LONG_OPT_RESV_PORTS:
  1098. if (optarg)
  1099. opt.resv_port_cnt = strtol(optarg, NULL, 10);
  1100. else
  1101. opt.resv_port_cnt = 0;
  1102. break;
  1103. case LONG_OPT_TMP:
  1104. opt.pn_min_tmp_disk = str_to_mbytes(optarg);
  1105. if (opt.pn_min_tmp_disk < 0) {
  1106. error("invalid tmp value %s", optarg);
  1107. exit(error_exit);
  1108. }
  1109. break;
  1110. case LONG_OPT_JOBID:
  1111. opt.jobid = _get_int(optarg, "jobid", true);
  1112. opt.jobid_set = true;
  1113. break;
  1114. case LONG_OPT_TIMEO:
  1115. opt.msg_timeout =
  1116. _get_int(optarg, "msg-timeout", true);
  1117. break;
  1118. case LONG_OPT_LAUNCH:
  1119. opt.max_launch_time =
  1120. _get_int(optarg, "max-launch-time", true);
  1121. break;
  1122. case LONG_OPT_XTO:
  1123. opt.max_exit_timeout =
  1124. _get_int(optarg, "max-exit-timeout", true);
  1125. break;
  1126. case LONG_OPT_UID:
  1127. if (opt.euid != (uid_t) -1) {
  1128. error("duplicate --uid option");
  1129. exit(error_exit);
  1130. }
  1131. if (uid_from_string (optarg, &opt.euid) < 0) {
  1132. error("--uid=\"%s\" invalid", optarg);
  1133. exit(error_exit);
  1134. }
  1135. break;
  1136. case LONG_OPT_GID:
  1137. if (opt.egid != (gid_t) -1) {
  1138. error("duplicate --gid option");
  1139. exit(error_exit);
  1140. }
  1141. if (gid_from_string (optarg, &opt.egid) < 0) {
  1142. error("--gid=\"%s\" invalid", optarg);
  1143. exit(error_exit);
  1144. }
  1145. break;
  1146. case LONG_OPT_DEBUG_SLURMD:
  1147. opt.slurmd_debug =
  1148. _get_int(optarg, "slurmd-debug", false);
  1149. break;
  1150. case LONG_OPT_DEBUG_TS:
  1151. opt.debugger_test = true;
  1152. /* make other parameters look like debugger
  1153. * is really attached */
  1154. opt.parallel_debug = true;
  1155. opt.max_launch_time = 120;
  1156. opt.max_threads = 1;
  1157. pmi_server_max_threads(opt.max_threads);
  1158. opt.msg_timeout = 15;
  1159. break;
  1160. case 'h':
  1161. case LONG_OPT_HELP:
  1162. _help();
  1163. exit(0);
  1164. case LONG_OPT_USAGE:
  1165. _usage();
  1166. exit(0);
  1167. case LONG_OPT_CONNTYPE:
  1168. verify_conn_type(optarg, opt.conn_type);
  1169. break;
  1170. case LONG_OPT_TEST_ONLY:
  1171. opt.test_only = true;
  1172. break;
  1173. case LONG_OPT_NETWORK:
  1174. xfree(opt.network);
  1175. opt.network = xstrdup(optarg);
  1176. setenv("SLURM_NETWORK", opt.network, 1);
  1177. break;
  1178. case LONG_OPT_PROPAGATE:
  1179. xfree(opt.propagate);
  1180. if (optarg)
  1181. opt.propagate = xstrdup(optarg);
  1182. else
  1183. opt.propagate = xstrdup("ALL");
  1184. break;
  1185. case LONG_OPT_PROLOG:
  1186. xfree(opt.prolog);
  1187. opt.prolog = xstrdup(optarg);
  1188. break;
  1189. case LONG_OPT_EPILOG:
  1190. xfree(opt.epilog);
  1191. opt.epilog = xstrdup(optarg);
  1192. break;
  1193. case LONG_OPT_BEGIN:
  1194. opt.begin = parse_time(optarg, 0);
  1195. if (errno == ESLURM_INVALID_TIME_VALUE) {
  1196. error("Invalid time specification %s",
  1197. optarg);
  1198. exit(error_exit);
  1199. }
  1200. break;
  1201. case LONG_OPT_MAIL_TYPE:
  1202. opt.mail_type |= parse_mail_type(optarg);
  1203. if (opt.mail_type == 0) {
  1204. error("--mail-type=%s invalid", optarg);
  1205. exit(error_exit);
  1206. }
  1207. break;
  1208. case LONG_OPT_MAIL_USER:
  1209. xfree(opt.mail_user);
  1210. opt.mail_user = xstrdup(optarg);
  1211. break;
  1212. case LONG_OPT_TASK_PROLOG:
  1213. xfree(opt.task_prolog);
  1214. opt.task_prolog = xstrdup(optarg);
  1215. break;
  1216. case LONG_OPT_TASK_EPILOG:
  1217. xfree(opt.task_epilog);
  1218. opt.task_epilog = xstrdup(optarg);
  1219. break;
  1220. case LONG_OPT_NICE:
  1221. if (optarg)
  1222. opt.nice = strtol(optarg, NULL, 10);
  1223. else
  1224. opt.nice = 100;
  1225. if (abs(opt.nice) > NICE_OFFSET) {
  1226. error("Invalid nice value, must be between "
  1227. "-%d and %d", NICE_OFFSET, NICE_OFFSET);
  1228. exit(error_exit);
  1229. }
  1230. if (opt.nice < 0) {
  1231. uid_t my_uid = getuid();
  1232. if ((my_uid != 0) &&
  1233. (my_uid != slurm_get_slurm_user_id())) {
  1234. error("Nice value must be non-negative, "
  1235. "value ignored");
  1236. opt.nice = 0;
  1237. }
  1238. }
  1239. break;
  1240. case LONG_OPT_MULTI:
  1241. opt.multi_prog = true;
  1242. break;
  1243. case LONG_OPT_COMMENT:
  1244. xfree(opt.comment);
  1245. opt.comment = xstrdup(optarg);
  1246. break;
  1247. case LONG_OPT_QOS:
  1248. xfree(opt.qos);
  1249. opt.qos = xstrdup(optarg);
  1250. break;
  1251. case LONG_OPT_SOCKETSPERNODE:
  1252. max_val = 0;
  1253. get_resource_arg_range( optarg, "sockets-per-node",
  1254. &opt.sockets_per_node,
  1255. &max_val, true );
  1256. if ((opt.sockets_per_node == 1) &&
  1257. (max_val == INT_MAX))
  1258. opt.sockets_per_node = NO_VAL;
  1259. break;
  1260. case LONG_OPT_CORESPERSOCKET:
  1261. max_val = 0;
  1262. get_resource_arg_range( optarg, "cores-per-socket",
  1263. &opt.cores_per_socket,
  1264. &max_val, true );
  1265. if ((opt.cores_per_socket == 1) &&
  1266. (max_val == INT_MAX))
  1267. opt.cores_per_socket = NO_VAL;
  1268. break;
  1269. case LONG_OPT_THREADSPERCORE:
  1270. max_val = 0;
  1271. get_resource_arg_range( optarg, "threads-per-core",
  1272. &opt.threads_per_core,
  1273. &max_val, true );
  1274. if ((opt.threads_per_core == 1) &&
  1275. (max_val == INT_MAX))
  1276. opt.threads_per_core = NO_VAL;
  1277. break;
  1278. case LONG_OPT_NTASKSPERNODE:
  1279. opt.ntasks_per_node = _get_int(optarg, "ntasks-per-node",
  1280. true);
  1281. break;
  1282. case LONG_OPT_NTASKSPERSOCKET:
  1283. opt.ntasks_per_socket = _get_int(optarg,
  1284. "ntasks-per-socket", true);
  1285. break;
  1286. case LONG_OPT_NTASKSPERCORE:
  1287. opt.ntasks_per_core = _get_int(optarg, "ntasks-per-core",
  1288. true);
  1289. break;
  1290. case LONG_OPT_HINT:
  1291. /* Keep after other options filled in */
  1292. if (verify_hint(optarg,
  1293. &opt.sockets_per_node,
  1294. &opt.cores_per_socket,
  1295. &opt.threads_per_core,
  1296. &opt.ntasks_per_core,
  1297. &opt.cpu_bind_type)) {
  1298. exit(error_exit);
  1299. }
  1300. break;
  1301. case LONG_OPT_BLRTS_IMAGE:
  1302. xfree(opt.blrtsimage);
  1303. opt.blrtsimage = xstrdup(optarg);
  1304. break;
  1305. case LONG_OPT_LINUX_IMAGE:
  1306. xfree(opt.linuximage);
  1307. opt.linuximage = xstrdup(optarg);
  1308. break;
  1309. case LONG_OPT_MLOADER_IMAGE:
  1310. xfree(opt.mloaderimage);
  1311. opt.mloaderimage = xstrdup(optarg);
  1312. break;
  1313. case LONG_OPT_RAMDISK_IMAGE:
  1314. xfree(opt.ramdiskimage);
  1315. opt.ramdiskimage = xstrdup(optarg);
  1316. break;
  1317. case LONG_OPT_REBOOT:
  1318. opt.reboot = true;
  1319. break;
  1320. case LONG_OPT_GET_USER_ENV:
  1321. error("--get-user-env is no longer supported in srun, "
  1322. "use sbatch");
  1323. break;
  1324. case LONG_OPT_PTY:
  1325. #ifdef HAVE_PTY_H
  1326. opt.pty = true;
  1327. opt.unbuffered = true; /* implicit */
  1328. if (opt.ifname)
  1329. tmp_str = "--input";
  1330. else if (opt.ofname)
  1331. tmp_str = "--output";
  1332. else if (opt.efname)
  1333. tmp_str = "--error";
  1334. else
  1335. tmp_str = NULL;
  1336. if (tmp_str) {
  1337. error("%s incompatible with --pty option",
  1338. tmp_str);
  1339. exit(error_exit);
  1340. }
  1341. #else
  1342. error("--pty not currently supported on this system "
  1343. "type");
  1344. #endif
  1345. break;
  1346. case LONG_OPT_CHECKPOINT:
  1347. xfree(opt.ckpt_interval_str);
  1348. opt.ckpt_interval_str = xstrdup(optarg);
  1349. break;
  1350. case LONG_OPT_OPEN_MODE:
  1351. if ((optarg[0] == 'a') || (optarg[0] == 'A'))
  1352. opt.open_mode = OPEN_MODE_APPEND;
  1353. else if ((optarg[0] == 't') || (optarg[0] == 'T'))
  1354. opt.open_mode = OPEN_MODE_TRUNCATE;
  1355. else {
  1356. error("Invalid --open-mode argument: %s. Ignored",
  1357. optarg);
  1358. }
  1359. break;
  1360. case LONG_OPT_ACCTG_FREQ:
  1361. opt.acctg_freq = _get_int(optarg, "acctg-freq",
  1362. false);
  1363. break;
  1364. case LONG_OPT_CPU_FREQ:
  1365. if (cpu_freq_verify_param(optarg, &opt.cpu_freq))
  1366. error("Invalid --cpu-freq argument: %s. Ignored",
  1367. optarg);
  1368. break;
  1369. case LONG_OPT_WCKEY:
  1370. xfree(opt.wckey);
  1371. opt.wckey = xstrdup(optarg);
  1372. break;
  1373. case LONG_OPT_RESERVATION:
  1374. xfree(opt.reservation);
  1375. opt.reservation = xstrdup(optarg);
  1376. break;
  1377. case LONG_OPT_LAUNCHER_OPTS:
  1378. xfree(opt.launcher_opts);
  1379. opt.launcher_opts = xstrdup(optarg);
  1380. break;
  1381. case LONG_OPT_CHECKPOINT_DIR:
  1382. xfree(opt.ckpt_dir);
  1383. opt.ckpt_dir = xstrdup(optarg);
  1384. break;
  1385. case LONG_OPT_RESTART_DIR:
  1386. xfree(opt.restart_dir);
  1387. opt.restart_dir = xstrdup(optarg);
  1388. break;
  1389. case LONG_OPT_SIGNAL:
  1390. if (get_signal_opts(optarg, &opt.warn_signal,
  1391. &opt.warn_time)) {
  1392. error("Invalid signal specification: %s",
  1393. optarg);
  1394. exit(error_exit);
  1395. }
  1396. break;
  1397. case LONG_OPT_TIME_MIN:
  1398. xfree(opt.time_min_str);
  1399. opt.time_min_str = xstrdup(optarg);
  1400. break;
  1401. case LONG_OPT_GRES:
  1402. if (!strcasecmp(optarg, "help") ||
  1403. !strcasecmp(optarg, "list")) {
  1404. print_gres_help();
  1405. exit(0);
  1406. }
  1407. xfree(opt.gres);
  1408. opt.gres = xstrdup(optarg);
  1409. break;
  1410. case LONG_OPT_ALPS:
  1411. verbose("Not running ALPS. --alps option ignored.");
  1412. break;
  1413. case LONG_OPT_REQ_SWITCH:
  1414. pos_delimit = strstr(optarg,"@");
  1415. if (pos_delimit != NULL) {
  1416. pos_delimit[0] = '\0';
  1417. pos_delimit++;
  1418. opt.wait4switch = time_str2secs(pos_delimit);
  1419. }
  1420. opt.req_switch = _get_int(optarg, "switches",
  1421. true);
  1422. break;
  1423. default:
  1424. if (spank_process_option (opt_char, optarg) < 0) {
  1425. exit(error_exit);
  1426. }
  1427. }
  1428. }
  1429. spank_option_table_destroy (optz);
  1430. }
  1431. #if defined HAVE_BG && !defined HAVE_BG_L_P
  1432. static bool _check_is_pow_of_2(int32_t n) {
  1433. /* Bitwise ANDing a power of 2 number like 16 with its
  1434. * negative (-16) gives itself back. Only integers which are power of
  1435. * 2 behave like that.
  1436. */
  1437. return ((n!=0) && (n&(-n))==n);
  1438. }
  1439. extern void bg_figure_nodes_tasks()
  1440. {
  1441. /* A bit of setup for IBM's runjob. runjob only has so many
  1442. options, so it isn't that bad.
  1443. */
  1444. int32_t node_cnt;
  1445. if (opt.max_nodes)
  1446. node_cnt = opt.max_nodes;
  1447. else
  1448. node_cnt = opt.min_nodes;
  1449. if (!opt.ntasks_set) {
  1450. if (opt.ntasks_per_node != NO_VAL)
  1451. opt.ntasks = node_cnt * opt.ntasks_per_node;
  1452. else {
  1453. opt.ntasks = node_cnt;
  1454. opt.ntasks_per_node = 1;
  1455. }
  1456. opt.ntasks_set = true;
  1457. } else {
  1458. int32_t ntpn;
  1459. bool figured = false;
  1460. if (opt.nodes_set) {
  1461. if (node_cnt > opt.ntasks) {
  1462. if (opt.nodes_set_opt)
  1463. info("You asked for %d nodes, "
  1464. "but only %d tasks, resetting "
  1465. "node count to %u.",
  1466. node_cnt, opt.ntasks, opt.ntasks);
  1467. opt.max_nodes = opt.min_nodes = node_cnt
  1468. = opt.ntasks;
  1469. }
  1470. }
  1471. /* If nodes not set do not try to set min/max nodes
  1472. yet since that would result in an incorrect
  1473. allocation. For a step allocation it is figured
  1474. out later in srun_job.c _job_create_structure().
  1475. */
  1476. if (!opt.ntasks_per_node || (opt.ntasks_per_node == NO_VAL)) {
  1477. /* We always want the next larger number if
  1478. there is a fraction so we try to stay in
  1479. the allocation requested.
  1480. */
  1481. opt.ntasks_per_node =
  1482. (opt.ntasks + node_cnt - 1) / node_cnt;
  1483. figured = true;
  1484. }
  1485. /* On a Q we need ntasks_per_node to be a multiple of 2 */
  1486. ntpn = opt.ntasks_per_node;
  1487. while (!_check_is_pow_of_2(ntpn))
  1488. ntpn++;
  1489. if (!figured && (ntpn != opt.ntasks_per_node)) {
  1490. info("You requested --ntasks-per-node=%d, which is not "
  1491. "a power of 2. Setting --ntasks-per-node=%d "
  1492. "for you.", opt.ntasks_per_node, ntpn);
  1493. figured = true;
  1494. }
  1495. opt.ntasks_per_node = ntpn;
  1496. ntpn = opt.ntasks / opt.ntasks_per_node;
  1497. /* Make sure we are requesting the correct number of nodes. */
  1498. if (node_cnt < ntpn) {
  1499. opt.max_nodes = opt.min_nodes = ntpn;
  1500. if (opt.nodes_set && !figured) {
  1501. fatal("You requested -N %d and -n %d "
  1502. "with --ntasks-per-node=%d. "
  1503. "This isn't a valid request.",
  1504. node_cnt, opt.ntasks,
  1505. opt.ntasks_per_node);
  1506. }
  1507. node_cnt = opt.max_nodes;
  1508. }
  1509. /* Do this again to make sure we have a legitimate
  1510. ratio. */
  1511. ntpn = opt.ntasks_per_node;
  1512. if ((node_cnt * ntpn) < opt.ntasks) {
  1513. ntpn++;
  1514. while (!_check_is_pow_of_2(ntpn))
  1515. ntpn++;
  1516. if (!figured && (ntpn != opt.ntasks_per_node))
  1517. info("You requested --ntasks-per-node=%d, "
  1518. "which cannot spread across %d nodes "
  1519. "correctly. Setting --ntasks-per-node=%d "
  1520. "for you.",
  1521. opt.ntasks_per_node, node_cnt, ntpn);
  1522. opt.ntasks_per_node = ntpn;
  1523. }
  1524. if (opt.nodes_set) {
  1525. if ((opt.ntasks_per_node != 1)
  1526. && (opt.ntasks_per_node != 2)
  1527. && (opt.ntasks_per_node != 4)
  1528. && (opt.ntasks_per_node != 8)
  1529. && (opt.ntasks_per_node != 16)
  1530. && (opt.ntasks_per_node != 32)
  1531. && (opt.ntasks_per_node != 64))
  1532. fatal("You requested -N %d and -n %d "
  1533. "which gives --ntasks-per-node=%d. "
  1534. "This isn't a valid request.",
  1535. node_cnt, opt.ntasks,
  1536. opt.ntasks_per_node);
  1537. else if (!opt.overcommit
  1538. && ((opt.ntasks_per_node == 32)
  1539. || (opt.ntasks_per_node == 64)))
  1540. fatal("You requested -N %d and -n %d "
  1541. "which gives --ntasks-per-node=%d. "
  1542. "This isn't a valid request "
  1543. "without --overcommit.",
  1544. node_cnt, opt.ntasks,
  1545. opt.ntasks_per_node);
  1546. }
  1547. }
  1548. }
  1549. #endif
  1550. /*
  1551. * _opt_args() : set options via commandline args and popt
  1552. */
  1553. static void _opt_args(int argc, char **argv)
  1554. {
  1555. int i, command_pos = 0, command_args = 0;
  1556. char **rest = NULL;
  1557. set_options(argc, argv);
  1558. if ((opt.pn_min_memory > -1) && (opt.mem_per_cpu > -1)) {
  1559. if (opt.pn_min_memory < opt.mem_per_cpu) {
  1560. info("mem < mem-per-cpu - resizing mem to be equal "
  1561. "to mem-per-cpu");
  1562. opt.pn_min_memory = opt.mem_per_cpu;
  1563. }
  1564. }
  1565. /* Check to see if user has specified enough resources to
  1566. * satisfy the plane distribution with the specified
  1567. * plane_size.
  1568. * if (n/plane_size < N) and ((N-1) * plane_size >= n) -->
  1569. * problem Simple check will not catch all the problem/invalid
  1570. * cases.
  1571. * The limitations of the plane distribution in the cons_res
  1572. * environment are more extensive and are documented in the
  1573. * SLURM reference guide. */
  1574. if (opt.distribution == SLURM_DIST_PLANE && opt.plane_size) {
  1575. if ((opt.ntasks/opt.plane_size) < opt.min_nodes) {
  1576. if (((opt.min_nodes-1)*opt.plane_size) >= opt.ntasks) {
  1577. #if(0)
  1578. info("Too few processes ((n/plane_size) %d < N %d) "
  1579. "and ((N-1)*(plane_size) %d >= n %d)) ",
  1580. opt.ntasks/opt.plane_size, opt.min_nodes,
  1581. (opt.min_nodes-1)*opt.plane_size, opt.ntasks);
  1582. #endif
  1583. error("Too few processes for the requested "
  1584. "{plane,node} distribution");
  1585. exit(error_exit);
  1586. }
  1587. }
  1588. }
  1589. #ifdef HAVE_AIX
  1590. if (opt.network == NULL) {
  1591. opt.network = "us,sn_all,bulk_xfer";
  1592. setenv("SLURM_NETWORK", opt.network, 1);
  1593. }
  1594. #endif
  1595. if (opt.dependency)
  1596. setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency);
  1597. if (opt.nodelist && (!opt.test_only)) {
  1598. #ifdef HAVE_BG
  1599. info("\tThe nodelist option should only be used if\n"
  1600. "\tthe block you are asking for can be created.\n"
  1601. "\tIt should also include all the midplanes you\n"
  1602. "\twant to use, partial lists will not work correctly.\n"
  1603. "\tPlease consult smap before using this option\n"
  1604. "\tor your job may be stuck with no way to run.");
  1605. #endif
  1606. }
  1607. opt.argc = 0;
  1608. if (optind < argc) {
  1609. rest = argv + optind;
  1610. while (rest[opt.argc] != NULL)
  1611. opt.argc++;
  1612. }
  1613. command_args = opt.argc;
  1614. if (!rest)
  1615. fatal("No command given to execute.");
  1616. #if defined HAVE_BG && !defined HAVE_BG_L_P
  1617. /* Since this is needed on an emulated system don't put this code in
  1618. * the launch plugin.
  1619. */
  1620. bg_figure_nodes_tasks();
  1621. #endif
  1622. command_pos = launch_g_setup_srun_opt(rest);
  1623. /* Since this is needed on an emulated system don't put this code in
  1624. * the launch plugin.
  1625. */
  1626. #if defined HAVE_BG && !defined HAVE_BG_L_P
  1627. if (opt.test_only && !opt.jobid_set && (opt.jobid != NO_VAL)) {
  1628. /* Do not perform allocate test, only disable use of "runjob" */
  1629. opt.test_only = false;
  1630. }
  1631. #endif
  1632. /* make sure we have allocated things correctly */
  1633. xassert((command_pos + command_args) <= opt.argc);
  1634. for (i = command_pos; i < opt.argc; i++) {
  1635. if (!rest[i-command_pos])
  1636. break;
  1637. opt.argv[i] = xstrdup(rest[i-command_pos]);
  1638. }
  1639. opt.argv[i] = NULL; /* End of argv's (for possible execv) */
  1640. if (!launch_g_handle_multi_prog_verify(command_pos)
  1641. && (opt.argc > command_pos)) {
  1642. char *fullpath;
  1643. if ((fullpath = search_path(opt.cwd,
  1644. opt.argv[command_pos],
  1645. false, X_OK))) {
  1646. xfree(opt.argv[command_pos]);
  1647. opt.argv[command_pos] = fullpath;
  1648. }
  1649. }
  1650. /* for (i=0; i<opt.argc; i++) */
  1651. /* info("%d is '%s'", i, opt.argv[i]); */
  1652. }
  1653. /*
  1654. * _opt_verify : perform some post option processing verification
  1655. *
  1656. */
  1657. static bool _opt_verify(void)
  1658. {
  1659. bool verified = true;
  1660. hostlist_t hl = NULL;
  1661. int hl_cnt = 0;
  1662. /*
  1663. * Do not set slurmd debug level higher than DEBUG2,
  1664. * as DEBUG3 is used for slurmd IO operations, which
  1665. * are not appropriate to be sent back to srun. (because
  1666. * these debug messages cause the generation of more
  1667. * debug messages ad infinitum)
  1668. */
  1669. if (opt.slurmd_debug + LOG_LEVEL_ERROR > LOG_LEVEL_DEBUG2) {
  1670. opt.slurmd_debug = LOG_LEVEL_DEBUG2 - LOG_LEVEL_ERROR;
  1671. info("Using srun's max debug increment of %d",
  1672. opt.slurmd_debug);
  1673. }
  1674. if (opt.quiet && _verbose) {
  1675. error ("don't specify both --verbose (-v) and --quiet (-Q)");
  1676. verified = false;
  1677. }
  1678. if (opt.no_alloc && !opt.nodelist) {
  1679. error("must specify a node list with -Z, --no-allocate.");
  1680. verified = false;
  1681. }
  1682. if (opt.no_alloc && opt.exc_nodes) {
  1683. error("can not specify --exclude list with -Z, --no-allocate.");
  1684. verified = false;
  1685. }
  1686. if (opt.no_alloc && opt.relative_set) {
  1687. error("do not specify -r,--relative with -Z,--no-allocate.");
  1688. verified = false;
  1689. }
  1690. if (opt.relative_set && (opt.exc_nodes || opt.nodelist)) {
  1691. error("-r,--relative not allowed with "
  1692. "-w,--nodelist or -x,--exclude.");
  1693. verified = false;
  1694. }
  1695. if (opt.cpus_set && (opt.pn_min_cpus < opt.cpus_per_task))
  1696. opt.pn_min_cpus = opt.cpus_per_task;
  1697. if (opt.argc > 0)
  1698. opt.cmd_name = base_name(opt.argv[0]);
  1699. if (!opt.nodelist) {
  1700. if((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) {
  1701. /* make sure the file being read in has a / in
  1702. it to make sure it is a file in the
  1703. valid_node_list function */
  1704. if(!strstr(opt.nodelist, "/")) {
  1705. char *add_slash = xstrdup("./");
  1706. xstrcat(add_slash, opt.nodelist);
  1707. xfree(opt.nodelist);
  1708. opt.nodelist = add_slash;
  1709. }
  1710. opt.distribution = SLURM_DIST_ARBITRARY;
  1711. opt.hostfile = xstrdup(opt.nodelist);
  1712. if (!_valid_node_list(&opt.nodelist)) {
  1713. error("Failure getting NodeNames from "
  1714. "hostfile");
  1715. exit(error_exit);
  1716. } else {
  1717. debug("loaded nodes (%s) from hostfile",
  1718. opt.nodelist);
  1719. }
  1720. }
  1721. } else {
  1722. if(strstr(opt.nodelist, "/"))
  1723. opt.hostfile = xstrdup(opt.nodelist);
  1724. if (!_valid_node_list(&opt.nodelist))
  1725. exit(error_exit);
  1726. }
  1727. /* set up the proc and node counts based on the arbitrary list
  1728. of nodes */
  1729. if((opt.distribution == SLURM_DIST_ARBITRARY)
  1730. && (!opt.nodes_set || !opt.ntasks_set)) {
  1731. hostlist_t hl = hostlist_create(opt.nodelist);
  1732. if(!opt.ntasks_set) {
  1733. opt.ntasks_set = true;
  1734. opt.ntasks = hostlist_count(hl);
  1735. }
  1736. if(!opt.nodes_set) {
  1737. opt.nodes_set = true;
  1738. opt.nodes_set_opt = true;
  1739. hostlist_uniq(hl);
  1740. opt.min_nodes = opt.max_nodes = hostlist_count(hl);
  1741. }
  1742. hostlist_destroy(hl);
  1743. }
  1744. /* now if max is set make sure we have <= max_nodes in the
  1745. * nodelist but only if it isn't arbitrary since the user has
  1746. * laid it out how it should be so don't mess with it print an
  1747. * error later if it doesn't work the way they wanted */
  1748. if(opt.max_nodes && opt.nodelist
  1749. && opt.distribution != SLURM_DIST_ARBITRARY) {
  1750. hostlist_t hl = hostlist_create(opt.nodelist);
  1751. int count = hostlist_count(hl);
  1752. if(count > opt.max_nodes) {
  1753. int i = 0;
  1754. error("Required nodelist includes more nodes than "
  1755. "permitted by max-node count (%d > %d). "
  1756. "Eliminating nodes from the nodelist.",
  1757. count, opt.max_nodes);
  1758. count -= opt.max_nodes;
  1759. while(i<count) {
  1760. char *name = hostlist_pop(hl);
  1761. if(name)
  1762. free(name);
  1763. else
  1764. break;
  1765. i++;
  1766. }
  1767. xfree(opt.nodelist);
  1768. opt.nodelist = hostlist_ranged_string_xmalloc(hl);
  1769. }
  1770. hostlist_destroy(hl);
  1771. }
  1772. if ((opt.argc == 0) && (opt.test_only == false)) {
  1773. error("must supply remote command");
  1774. verified = false;
  1775. }
  1776. /* check for realistic arguments */
  1777. if (opt.ntasks <= 0) {
  1778. error("invalid number of tasks (-n %d)", opt.ntasks);
  1779. verified = false;
  1780. }
  1781. if (opt.cpus_set && (opt.cpus_per_task <= 0)) {
  1782. error("invalid number of cpus per task (-c %d)",
  1783. opt.cpus_per_task);
  1784. verified = false;
  1785. }
  1786. if ((opt.min_nodes <= 0) || (opt.max_nodes < 0) ||
  1787. (opt.max_nodes && (opt.min_nodes > opt.max_nodes))) {
  1788. error("invalid number of nodes (-N %d-%d)",
  1789. opt.min_nodes, opt.max_nodes);
  1790. verified = false;
  1791. }
  1792. #if defined(HAVE_BGL)
  1793. if (opt.blrtsimage && strchr(opt.blrtsimage, ' ')) {
  1794. error("invalid BlrtsImage given '%s'", opt.blrtsimage);
  1795. verified = false;
  1796. }
  1797. #endif
  1798. if (opt.linuximage && strchr(opt.linuximage, ' ')) {
  1799. #ifdef HAVE_BGL
  1800. error("invalid LinuxImage given '%s'", opt.linuximage);
  1801. #else
  1802. error("invalid CnloadImage given '%s'", opt.linuximage);
  1803. #endif
  1804. verified = false;
  1805. }
  1806. if (opt.mloaderimage && strchr(opt.mloaderimage, ' ')) {
  1807. error("invalid MloaderImage given '%s'", opt.mloaderimage);
  1808. verified = false;
  1809. }
  1810. if (opt.ramdiskimage && strchr(opt.ramdiskimage, ' ')) {
  1811. #ifdef HAVE_BGL
  1812. error("invalid RamDiskImage given '%s'", opt.ramdiskimage);
  1813. #else
  1814. error("invalid IoloadImage given '%s'", opt.ramdiskimage);
  1815. #endif
  1816. verified = false;
  1817. }
  1818. /* bound max_threads/cores from ntasks_cores/sockets */
  1819. if (opt.ntasks_per_core > 0) {
  1820. /* if cpu_bind_type doesn't already have a auto pref,
  1821. * choose the level based on the level of ntasks
  1822. */
  1823. if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
  1824. CPU_BIND_TO_CORES |
  1825. CPU_BIND_TO_THREADS |
  1826. CPU_BIND_TO_LDOMS))) {
  1827. opt.cpu_bind_type |= CPU_BIND_TO_CORES;
  1828. }
  1829. }
  1830. if (opt.ntasks_per_socket > 0) {
  1831. /* if cpu_bind_type doesn't already have a auto pref,
  1832. * choose the level based on the level of ntasks
  1833. */
  1834. if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
  1835. CPU_BIND_TO_CORES |
  1836. CPU_BIND_TO_THREADS |
  1837. CPU_BIND_TO_LDOMS))) {
  1838. opt.cpu_bind_type |= CPU_BIND_TO_SOCKETS;
  1839. }
  1840. }
  1841. /* massage the numbers */
  1842. if (opt.nodelist) {
  1843. hl = hostlist_create(opt.nodelist);
  1844. if (!hl) {
  1845. error("memory allocation failure");
  1846. exit(error_exit);
  1847. }
  1848. hostlist_uniq(hl);
  1849. hl_cnt = hostlist_count(hl);
  1850. if (opt.nodes_set)
  1851. opt.min_nodes = MAX(hl_cnt, opt.min_nodes);
  1852. else
  1853. opt.min_nodes = hl_cnt;
  1854. }
  1855. if ((opt.nodes_set || opt.extra_set) &&
  1856. ((opt.min_nodes == opt.max_nodes) || (opt.max_nodes == 0)) &&
  1857. !opt.ntasks_set) {
  1858. /* 1 proc / node default */
  1859. opt.ntasks = opt.min_nodes;
  1860. /* 1 proc / min_[socket * core * thread] default */
  1861. if ((opt.sockets_per_node != NO_VAL) &&
  1862. (opt.cores_per_socket != NO_VAL) &&
  1863. (opt.threads_per_core != NO_VAL)) {
  1864. opt.ntasks *= opt.sockets_per_node;
  1865. opt.ntasks *= opt.cores_per_socket;
  1866. opt.ntasks *= opt.threads_per_core;
  1867. opt.ntasks_set = true;
  1868. }
  1869. /* massage the numbers */
  1870. if (opt.nodelist) {
  1871. if (hl) /* possibly built above */
  1872. hostlist_destroy(hl);
  1873. hl = hostlist_create(opt.nodelist);
  1874. if (!hl) {
  1875. error("memory allocation failure");
  1876. exit(error_exit);
  1877. }
  1878. if(opt.distribution == SLURM_DIST_ARBITRARY
  1879. && !opt.ntasks_set) {
  1880. opt.ntasks = hostlist_count(hl);
  1881. opt.ntasks_set = true;
  1882. }
  1883. hostlist_uniq(hl);
  1884. hl_cnt = hostlist_count(hl);
  1885. if (opt.nodes_set)
  1886. opt.min_nodes = MAX(hl_cnt, opt.min_nodes);
  1887. else
  1888. opt.min_nodes = hl_cnt;
  1889. /* Don't destroy hl here since it may be used later */
  1890. }
  1891. } else if (opt.nodes_set && opt.ntasks_set) {
  1892. /*
  1893. * Make sure that the number of
  1894. * max_nodes is <= number of tasks
  1895. */
  1896. if (opt.ntasks < opt.max_nodes)
  1897. opt.max_nodes = opt.ntasks;
  1898. /*
  1899. * make sure # of procs >= min_nodes
  1900. */
  1901. if ((opt.ntasks < opt.min_nodes) && (opt.ntasks > 0)) {
  1902. info ("Warning: can't run %d processes on %d "
  1903. "nodes, setting nnodes to %d",
  1904. opt.ntasks, opt.min_nodes, opt.ntasks);
  1905. opt.min_nodes = opt.ntasks;
  1906. opt.nodes_set_opt = true;
  1907. if (opt.max_nodes
  1908. && (opt.min_nodes > opt.max_nodes) )
  1909. opt.max_nodes = opt.min_nodes;
  1910. if (hl_cnt > opt.min_nodes) {
  1911. int del_cnt, i;
  1912. char *host;
  1913. del_cnt = hl_cnt - opt.min_nodes;
  1914. for (i=0; i<del_cnt; i++) {
  1915. host = hostlist_pop(hl);
  1916. free(host);
  1917. }
  1918. xfree(opt.nodelist);
  1919. opt.nodelist =
  1920. hostlist_ranged_string_xmalloc(hl);
  1921. }
  1922. }
  1923. } /* else if (opt.ntasks_set && !opt.nodes_set) */
  1924. if (hl)
  1925. hostlist_destroy(hl);
  1926. if (opt.max_threads <= 0) { /* set default */
  1927. error("Thread value invalid, reset to 1");
  1928. opt.max_threads = 1;
  1929. pmi_server_max_threads(opt.max_threads);
  1930. } else if (opt.max_threads > MAX_THREADS) {
  1931. error("Thread value exceeds defined limit, reset to %d",
  1932. MAX_THREADS);
  1933. }
  1934. if (opt.labelio && opt.unbuffered) {
  1935. error("Do not specify both -l (--label) and "
  1936. "-u (--unbuffered)");
  1937. exit(error_exit);
  1938. }
  1939. /*
  1940. * --wait always overrides hidden max_exit_timeout
  1941. */
  1942. if (opt.max_wait)
  1943. opt.max_exit_timeout = opt.max_wait;
  1944. if (opt.time_limit_str) {
  1945. opt.time_limit = time_str2mins(opt.time_limit_str);
  1946. if ((opt.time_limit < 0) && (opt.time_limit != INFINITE)) {
  1947. error("Invalid time limit specification");
  1948. exit(error_exit);
  1949. }
  1950. if (opt.time_limit == 0)
  1951. opt.time_limit = INFINITE;
  1952. }
  1953. if (opt.time_min_str) {
  1954. opt.time_min = time_str2mins(opt.time_min_str);
  1955. if ((opt.time_min < 0) && (opt.time_min != INFINITE)) {
  1956. error("Invalid time-min specification");
  1957. exit(error_exit);
  1958. }
  1959. if (opt.time_min == 0)
  1960. opt.time_min = INFINITE;
  1961. }
  1962. if (opt.ckpt_interval_str) {
  1963. opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str);
  1964. if ((opt.ckpt_interval < 0) &&
  1965. (opt.ckpt_interval != INFINITE)) {
  1966. error("Invalid checkpoint interval specification");
  1967. exit(error_exit);
  1968. }
  1969. }
  1970. if (! opt.ckpt_dir)
  1971. opt.ckpt_dir = xstrdup(opt.cwd);
  1972. if ((opt.euid != (uid_t) -1) && (opt.euid != opt.uid))
  1973. opt.uid = opt.euid;
  1974. if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid))
  1975. opt.gid = opt.egid;
  1976. if (slurm_verify_cpu_bind(NULL, &opt.cpu_bind,
  1977. &opt.cpu_bind_type))
  1978. exit(error_exit);
  1979. if (!mpi_initialized) {
  1980. mpi_type = slurm_get_mpi_default();
  1981. (void) mpi_hook_client_init(NULL);
  1982. }
  1983. if ((opt.resv_port_cnt == NO_VAL) && !strcmp(mpi_type, "openmpi"))
  1984. opt.resv_port_cnt = 0;
  1985. xfree(mpi_type);
  1986. return verified;
  1987. }
  1988. /* Initialize the spank_job_env based upon environment variables set
  1989. * via salloc or sbatch commands */
  1990. extern void init_spank_env(void)
  1991. {
  1992. int i;
  1993. char *name, *eq, *value;
  1994. if (environ == NULL)
  1995. return;
  1996. for (i=0; environ[i]; i++) {
  1997. if (strncmp(environ[i], "SLURM_SPANK_", 12))
  1998. continue;
  1999. name = xstrdup(environ[i] + 12);
  2000. eq = strchr(name, (int)'=');
  2001. if (eq == NULL) {
  2002. xfree(name);
  2003. break;
  2004. }
  2005. eq[0] = '\0';
  2006. value = eq + 1;
  2007. spank_set_job_env(name, value, 1);
  2008. xfree(name);
  2009. }
  2010. }
  2011. /* Functions used by SPANK plugins to read and write job environment
  2012. * variables for use within job's Prolog and/or Epilog */
  2013. extern char *spank_get_job_env(const char *name)
  2014. {
  2015. int i, len;
  2016. char *tmp_str = NULL;
  2017. if ((name == NULL) || (name[0] == '\0') ||
  2018. (strchr(name, (int)'=') != NULL)) {
  2019. slurm_seterrno(EINVAL);
  2020. return NULL;
  2021. }
  2022. xstrcat(tmp_str, name);
  2023. xstrcat(tmp_str, "=");
  2024. len = strlen(tmp_str);
  2025. for (i=0; i<opt.spank_job_env_size; i++) {
  2026. if (strncmp(opt.spank_job_env[i], tmp_str, len))
  2027. continue;
  2028. xfree(tmp_str);
  2029. return (opt.spank_job_env[i] + len);
  2030. }
  2031. return NULL;
  2032. }
  2033. extern int spank_set_job_env(const char *name, const char *value,
  2034. int overwrite)
  2035. {
  2036. int i, len;
  2037. char *tmp_str = NULL;
  2038. if ((name == NULL) || (name[0] == '\0') ||
  2039. (strchr(name, (int)'=') != NULL)) {
  2040. slurm_seterrno(EINVAL);
  2041. return -1;
  2042. }
  2043. xstrcat(tmp_str, name);
  2044. xstrcat(tmp_str, "=");
  2045. len = strlen(tmp_str);
  2046. xstrcat(tmp_str, value);
  2047. for (i=0; i<opt.spank_job_env_size; i++) {
  2048. if (strncmp(opt.spank_job_env[i], tmp_str, len))
  2049. continue;
  2050. if (overwrite) {
  2051. xfree(opt.spank_job_env[i]);
  2052. opt.spank_job_env[i] = tmp_str;
  2053. } else
  2054. xfree(tmp_str);
  2055. return 0;
  2056. }
  2057. /* Need to add an entry */
  2058. opt.spank_job_env_size++;
  2059. xrealloc(opt.spank_job_env, sizeof(char *) * opt.spank_job_env_size);
  2060. opt.spank_job_env[i] = tmp_str;
  2061. return 0;
  2062. }
  2063. extern int spank_unset_job_env(const char *name)
  2064. {
  2065. int i, j, len;
  2066. char *tmp_str = NULL;
  2067. if ((name == NULL) || (name[0] == '\0') ||
  2068. (strchr(name, (int)'=') != NULL)) {
  2069. slurm_seterrno(EINVAL);
  2070. return -1;
  2071. }
  2072. xstrcat(tmp_str, name);
  2073. xstrcat(tmp_str, "=");
  2074. len = strlen(tmp_str);
  2075. for (i=0; i<opt.spank_job_env_size; i++) {
  2076. if (strncmp(opt.spank_job_env[i], tmp_str, len))
  2077. continue;
  2078. xfree(opt.spank_job_env[i]);
  2079. for (j=(i+1); j<opt.spank_job_env_size; i++, j++)
  2080. opt.spank_job_env[i] = opt.spank_job_env[j];
  2081. opt.spank_job_env_size--;
  2082. if (opt.spank_job_env_size == 0)
  2083. xfree(opt.spank_job_env);
  2084. return 0;
  2085. }
  2086. return 0; /* not found */
  2087. }
  2088. /* helper function for printing options
  2089. *
  2090. * warning: returns pointer to memory allocated on the stack.
  2091. */
  2092. static char *print_constraints()
  2093. {
  2094. char *buf = xstrdup("");
  2095. if (opt.pn_min_cpus > 0)
  2096. xstrfmtcat(buf, "mincpus-per-node=%d ", opt.pn_min_cpus);
  2097. if (opt.pn_min_memory > 0)
  2098. xstrfmtcat(buf, "mem-per-node=%dM ", opt.pn_min_memory);
  2099. if (opt.mem_per_cpu > 0)
  2100. xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu);
  2101. if (opt.pn_min_tmp_disk > 0)
  2102. xstrfmtcat(buf, "tmp-per-node=%ld ", opt.pn_min_tmp_disk);
  2103. if (opt.contiguous == true)
  2104. xstrcat(buf, "contiguous ");
  2105. if (opt.nodelist != NULL)
  2106. xstrfmtcat(buf, "nodelist=%s ", opt.nodelist);
  2107. if (opt.exc_nodes != NULL)
  2108. xstrfmtcat(buf, "exclude=%s ", opt.exc_nodes);
  2109. if (opt.constraints != NULL)
  2110. xstrfmtcat(buf, "constraints=`%s' ", opt.constraints);
  2111. return buf;
  2112. }
  2113. #define tf_(b) (b == true) ? "true" : "false"
  2114. static void _opt_list(void)
  2115. {
  2116. int i;
  2117. char *str;
  2118. info("defined options for program `%s'", opt.progname);
  2119. info("--------------- ---------------------");
  2120. info("user : `%s'", opt.user);
  2121. info("uid : %ld", (long) opt.uid);
  2122. info("gid : %ld", (long) opt.gid);
  2123. info("cwd : %s", opt.cwd);
  2124. info("ntasks : %d %s", opt.ntasks,
  2125. opt.ntasks_set ? "(set)" : "(default)");
  2126. if (opt.cpus_set)
  2127. info("cpus_per_task : %d", opt.cpus_per_task);
  2128. if (opt.max_nodes)
  2129. info("nodes : %d-%d", opt.min_nodes, opt.max_nodes);
  2130. else {
  2131. info("nodes : %d %s", opt.min_nodes,
  2132. opt.nodes_set ? "(set)" : "(default)");
  2133. }
  2134. info("jobid : %u %s", opt.jobid,
  2135. opt.jobid_set ? "(set)" : "(default)");
  2136. info("partition : %s",
  2137. opt.partition == NULL ? "default" : opt.partition);
  2138. info("job name : `%s'", opt.job_name);
  2139. info("reservation : `%s'", opt.reservation);
  2140. info("wckey : `%s'", opt.wckey);
  2141. info("switches : %d", opt.req_switch);
  2142. info("wait-for-switches : %d", opt.wait4switch);
  2143. info("distribution : %s", format_task_dist_states(opt.distribution));
  2144. if(opt.distribution == SLURM_DIST_PLANE)
  2145. info("plane size : %u", opt.plane_size);
  2146. info("cpu_bind : %s",
  2147. opt.cpu_bind == NULL ? "default" : opt.cpu_bind);
  2148. info("mem_bind : %s",
  2149. opt.mem_bind == NULL ? "default" : opt.mem_bind);
  2150. info("cpu_freq : %u", opt.cpu_freq);
  2151. info("verbose : %d", _verbose);
  2152. info("slurmd_debug : %d", opt.slurmd_debug);
  2153. if (opt.immediate <= 1)
  2154. info("immediate : %s", tf_(opt.immediate));
  2155. else
  2156. info("immediate : %d secs", (opt.immediate - 1));
  2157. info("label output : %s", tf_(opt.labelio));
  2158. info("unbuffered IO : %s", tf_(opt.unbuffered));
  2159. info("overcommit : %s", tf_(opt.overcommit));
  2160. info("threads : %d", opt.max_threads);
  2161. if (opt.time_limit == INFINITE)
  2162. info("time_limit : INFINITE");
  2163. else if (opt.time_limit != NO_VAL)
  2164. info("time_limit : %d", opt.time_limit);
  2165. if (opt.time_min != NO_VAL)
  2166. info("time_min : %d", opt.time_min);
  2167. if (opt.ckpt_interval)
  2168. info("checkpoint : %d mins", opt.ckpt_interval);
  2169. info("checkpoint_dir : %s", opt.ckpt_dir);
  2170. if (opt.restart_dir)
  2171. info("restart_dir : %s", opt.restart_dir);
  2172. info("wait : %d", opt.max_wait);
  2173. if (opt.nice)
  2174. info("nice : %d", opt.nice);
  2175. info("account : %s", opt.account);
  2176. info("comment : %s", opt.comment);
  2177. info("dependency : %s", opt.dependency);
  2178. if (opt.gres)
  2179. info("gres : %s", opt.gres);
  2180. info("exclusive : %s", tf_(opt.exclusive));
  2181. info("qos : %s", opt.qos);
  2182. if (opt.shared != (uint16_t) NO_VAL)
  2183. info("shared : %u", opt.shared);
  2184. str = print_constraints();
  2185. info("constraints : %s", str);
  2186. xfree(str);
  2187. for (i = 0; i < HIGHEST_DIMENSIONS; i++) {
  2188. if (opt.conn_type[i] == (uint16_t) NO_VAL)
  2189. break;
  2190. info("conn_type[%d] : %u", i, opt.conn_type[i]);
  2191. }
  2192. str = print_geometry(opt.geometry);
  2193. info("geometry : %s", str);
  2194. xfree(str);
  2195. info("reboot : %s", opt.reboot ? "no" : "yes");
  2196. info("rotate : %s", opt.no_rotate ? "yes" : "no");
  2197. info("preserve_env : %s", tf_(opt.preserve_env));
  2198. #ifdef HAVE_BGL
  2199. if (opt.blrtsimage)
  2200. info("BlrtsImage : %s", opt.blrtsimage);
  2201. #endif
  2202. if (opt.linuximage)
  2203. #ifdef HAVE_BGL
  2204. info("LinuxImage : %s", opt.linuximage);
  2205. #else
  2206. info("CnloadImage : %s", opt.linuximage);
  2207. #endif
  2208. if (opt.mloaderimage)
  2209. info("MloaderImage : %s", opt.mloaderimage);
  2210. if (opt.ramdiskimage)
  2211. #ifdef HAVE_BGL
  2212. info("RamDiskImage : %s", opt.ramdiskimage);
  2213. #else
  2214. info("IoloadImage : %s", opt.ramdiskimage);
  2215. #endif
  2216. info("network : %s", opt.network);
  2217. info("propagate : %s",
  2218. opt.propagate == NULL ? "NONE" : opt.propagate);
  2219. if (opt.begin) {
  2220. char time_str[32];
  2221. slurm_make_time_str(&opt.begin, time_str, sizeof(time_str));
  2222. info("begin : %s", time_str);
  2223. }
  2224. info("prolog : %s", opt.prolog);
  2225. info("epilog : %s", opt.epilog);
  2226. info("mail_type : %s", print_mail_type(opt.mail_type));
  2227. info("mail_user : %s", opt.mail_user);
  2228. info("task_prolog : %s", opt.task_prolog);
  2229. info("task_epilog : %s", opt.task_epilog);
  2230. info("multi_prog : %s", opt.multi_prog ? "yes" : "no");
  2231. info("sockets-per-node : %d", opt.sockets_per_node);
  2232. info("cores-per-socket : %d", opt.cores_per_socket);
  2233. info("threads-per-core : %d", opt.threads_per_core);
  2234. info("ntasks-per-node : %d", opt.ntasks_per_node);
  2235. info("ntasks-per-socket : %d", opt.ntasks_per_socket);
  2236. info("ntasks-per-core : %d", opt.ntasks_per_core);
  2237. info("plane_size : %u", opt.plane_size);
  2238. if (opt.resv_port_cnt != NO_VAL)
  2239. info("resv_port_cnt : %d", opt.resv_port_cnt);
  2240. str = print_commandline(opt.argc, opt.argv);
  2241. info("remote command : `%s'", str);
  2242. xfree(str);
  2243. }
  2244. /* Determine if srun is under the control of a parallel debugger or not */
  2245. static bool _under_parallel_debugger (void)
  2246. {
  2247. #if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
  2248. /* Use symbols from the runjob.so library provided by IBM.
  2249. * Do NOT use debugger symbols local to the srun command */
  2250. return false;
  2251. #else
  2252. return (MPIR_being_debugged != 0);
  2253. #endif
  2254. }
  2255. static void _usage(void)
  2256. {
  2257. printf(
  2258. "Usage: srun [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n"
  2259. " [-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n"
  2260. " [-D path] [--immediate[=secs]] [--overcommit] [--no-kill]\n"
  2261. " [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n"
  2262. " [--jobid=id] [--verbose] [--slurmd_debug=#] [--gres=list]\n"
  2263. " [-T threads] [-W sec] [--checkpoint=time]\n"
  2264. " [--checkpoint-dir=dir] [--licenses=names]\n"
  2265. " [--restart-dir=dir] [--qos=qos] [--time-min=minutes]\n"
  2266. " [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
  2267. " [--mpi=type] [--account=name] [--dependency=type:jobid]\n"
  2268. " [--launch-cmd] [--launcher-opts=options]\n"
  2269. " [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n"
  2270. " [--cpu_bind=...] [--mem_bind=...] [--network=type]\n"
  2271. " [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n"
  2272. " [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n"
  2273. #ifdef HAVE_BG /* Blue gene specific options */
  2274. #ifdef HAVE_BG_L_P
  2275. " [--geometry=XxYxZ] "
  2276. #else
  2277. " [--geometry=AxXxYxZ] "
  2278. #endif
  2279. "[--conn-type=type] [--no-rotate] [--reboot]\n"
  2280. #ifdef HAVE_BGL
  2281. " [--blrts-image=path] [--linux-image=path]\n"
  2282. " [--mloader-image=path] [--ramdisk-image=path]\n"
  2283. #else
  2284. " [--cnload-image=path]\n"
  2285. " [--mloader-image=path] [--ioload-image=path]\n"
  2286. #endif
  2287. #endif
  2288. " [--mail-type=type] [--mail-user=user] [--nice[=value]]\n"
  2289. " [--prolog=fname] [--epilog=fname]\n"
  2290. " [--task-prolog=fname] [--task-epilog=fname]\n"
  2291. " [--ctrl-comm-ifhn=addr] [--multi-prog]\n"
  2292. " [--switches=max-switches{@max-time-to-wait}]\n"
  2293. " [-w hosts...] [-x hosts...] executable [args...]\n");
  2294. }
  2295. static void _help(void)
  2296. {
  2297. slurm_ctl_conf_t *conf;
  2298. printf (
  2299. "Usage: srun [OPTIONS...] executable [args...]\n"
  2300. "\n"
  2301. "Parallel run options:\n"
  2302. " -A, --account=name charge job to specified account\n"
  2303. " --begin=time defer job until HH:MM MM/DD/YY\n"
  2304. " -c, --cpus-per-task=ncpus number of cpus required per task\n"
  2305. " --checkpoint=time job step checkpoint interval\n"
  2306. " --checkpoint-dir=dir directory to store job step checkpoint image \n"
  2307. " files\n"
  2308. " --comment=name arbitrary comment\n"
  2309. " -d, --dependency=type:jobid defer job until condition on jobid is satisfied\n"
  2310. " -D, --chdir=path change remote current working directory\n"
  2311. " -e, --error=err location of stderr redirection\n"
  2312. " --epilog=program run \"program\" after launching job step\n"
  2313. " -E, --preserve-env env vars for node and task counts override\n"
  2314. " command-line flags\n"
  2315. " --get-user-env used by Moab. See srun man page.\n"
  2316. " --gres=list required generic resources\n"
  2317. " -H, --hold submit job in held state\n"
  2318. " -i, --input=in location of stdin redirection\n"
  2319. " -I, --immediate[=secs] exit if resources not available in \"secs\"\n"
  2320. " --jobid=id run under already allocated job\n"
  2321. " -J, --job-name=jobname name of job\n"
  2322. " -k, --no-kill do not kill job on node failure\n"
  2323. " -K, --kill-on-bad-exit kill the job if any task terminates with a\n"
  2324. " non-zero exit code\n"
  2325. " -l, --label prepend task number to lines of stdout/err\n"
  2326. " -L, --licenses=names required license, comma separated\n"
  2327. " --launch-cmd print external launcher command line if not SLURM\n"
  2328. " --launcher-opts= options for the external launcher command if not SLURM\n"
  2329. " -m, --distribution=type distribution method for processes to nodes\n"
  2330. " (type = block|cyclic|arbitrary)\n"
  2331. " --mail-type=type notify on state change: BEGIN, END, FAIL or ALL\n"
  2332. " --mail-user=user who to send email notification for job state\n"
  2333. " changes\n"
  2334. " --mpi=type type of MPI being used\n"
  2335. " --multi-prog if set the program name specified is the\n"
  2336. " configuration specification for multiple programs\n"
  2337. " -n, --ntasks=ntasks number of tasks to run\n"
  2338. " --nice[=value] decrease scheduling priority by value\n"
  2339. " --ntasks-per-node=n number of tasks to invoke on each node\n"
  2340. " -N, --nodes=N number of nodes on which to run (N = min[-max])\n"
  2341. " -o, --output=out location of stdout redirection\n"
  2342. " -O, --overcommit overcommit resources\n"
  2343. " -p, --partition=partition partition requested\n"
  2344. " --prolog=program run \"program\" before launching job step\n"
  2345. " --propagate[=rlimits] propagate all [or specific list of] rlimits\n"
  2346. #ifdef HAVE_PTY_H
  2347. " --pty run task zero in pseudo terminal\n"
  2348. #endif
  2349. " -q, --quit-on-interrupt quit on single Ctrl-C\n"
  2350. " --qos=qos quality of service\n"
  2351. " -Q, --quiet quiet mode (suppress informational messages)\n"
  2352. " -r, --relative=n run job step relative to node n of allocation\n"
  2353. " --restart-dir=dir directory of checkpoint image files to restart\n"
  2354. " from\n"
  2355. " -s, --share share nodes with other jobs\n"
  2356. " --slurmd-debug=level slurmd debug level\n"
  2357. " --task-epilog=program run \"program\" after launching task\n"
  2358. " --task-prolog=program run \"program\" before launching task\n"
  2359. " -T, --threads=threads set srun launch fanout\n"
  2360. " -t, --time=minutes time limit\n"
  2361. " --time-min=minutes minimum time limit (if distinct)\n"
  2362. " -u, --unbuffered do not line-buffer stdout/err\n"
  2363. " -v, --verbose verbose mode (multiple -v's increase verbosity)\n"
  2364. " -W, --wait=sec seconds to wait after first task exits\n"
  2365. " before killing job\n"
  2366. " -X, --disable-status Disable Ctrl-C status feature\n"
  2367. " --switches=max-switches{@max-time-to-wait}\n"
  2368. " Optimum switches and max time to wait for optimum\n"
  2369. "\n"
  2370. "Constraint options:\n"
  2371. " --contiguous demand a contiguous range of nodes\n"
  2372. " -C, --constraint=list specify a list of constraints\n"
  2373. " --mem=MB minimum amount of real memory\n"
  2374. " --mincpus=n minimum number of logical processors (threads) per node\n"
  2375. " --reservation=name allocate resources from named reservation\n"
  2376. " --tmp=MB minimum amount of temporary disk\n"
  2377. " -w, --nodelist=hosts... request a specific list of hosts\n"
  2378. " -x, --exclude=hosts... exclude a specific list of hosts\n"
  2379. " -Z, --no-allocate don't allocate nodes (must supply -w)\n"
  2380. "\n"
  2381. "Consumable resources related options:\n"
  2382. " --exclusive allocate nodes in exclusive mode when\n"
  2383. " cpu consumable resource is enabled\n"
  2384. " or don't share CPUs for job steps\n"
  2385. " --mem-per-cpu=MB maximum amount of real memory per allocated\n"
  2386. " cpu required by the job.\n"
  2387. " --mem >= --mem-per-cpu if --mem is specified.\n"
  2388. " --resv-ports reserve communication ports\n"
  2389. "\n"
  2390. "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n"
  2391. " -B, --extra-node-info=S[:C[:T]] Expands to:\n"
  2392. " --sockets-per-node=S number of sockets per node to allocate\n"
  2393. " --cores-per-socket=C number of cores per socket to allocate\n"
  2394. " --threads-per-core=T number of threads per core to allocate\n"
  2395. " each field can be 'min' or wildcard '*'\n"
  2396. " total cpus requested = (N x S x C x T)\n"
  2397. "\n"
  2398. " --ntasks-per-core=n number of tasks to invoke on each core\n"
  2399. " --ntasks-per-socket=n number of tasks to invoke on each socket\n");
  2400. conf = slurm_conf_lock();
  2401. if (conf->task_plugin != NULL
  2402. && strcasecmp(conf->task_plugin, "task/affinity") == 0) {
  2403. printf(
  2404. " --cpu_bind= Bind tasks to CPUs\n"
  2405. " (see \"--cpu_bind=help\" for options)\n"
  2406. " --hint= Bind tasks according to application hints\n"
  2407. " (see \"--hint=help\" for options)\n"
  2408. " --mem_bind= Bind memory to locality domains (ldom)\n"
  2409. " (see \"--mem_bind=help\" for options)\n");
  2410. }
  2411. slurm_conf_unlock();
  2412. spank_print_options(stdout, 6, 30);
  2413. printf("\n"
  2414. #if defined HAVE_AIX || defined HAVE_LIBNRT /* IBM PE specific options */
  2415. "PE related options:\n"
  2416. " --network=type communication protocol to be used\n"
  2417. "\n"
  2418. #endif
  2419. #ifdef HAVE_BG /* Blue gene specific options */
  2420. "Blue Gene related options:\n"
  2421. #ifdef HAVE_BG_L_P
  2422. " -g, --geometry=XxYxZ geometry constraints of the job\n"
  2423. #else
  2424. " -g, --geometry=AxXxYxZ Midplane geometry constraints of the job,\n"
  2425. " sub-block allocations can not be allocated\n"
  2426. " with the geometry option\n"
  2427. #endif
  2428. " -R, --no-rotate disable geometry rotation\n"
  2429. " --reboot reboot block before starting job\n"
  2430. " --conn-type=type constraint on type of connection, MESH or TORUS\n"
  2431. " if not set, then tries to fit TORUS else MESH\n"
  2432. #ifndef HAVE_BGL
  2433. " If wanting to run in HTC mode (only for 1\n"
  2434. " midplane and below). You can use HTC_S for\n"
  2435. " SMP, HTC_D for Dual, HTC_V for\n"
  2436. " virtual node mode, and HTC_L for Linux mode.\n"
  2437. " --cnload-image=path path to compute node image for bluegene block. Default if not set\n"
  2438. " --mloader-image=path path to mloader image for bluegene block. Default if not set\n"
  2439. " --ioload-image=path path to ioload image for bluegene block. Default if not set\n"
  2440. #else
  2441. " --blrts-image=path path to blrts image for bluegene block. Default if not set\n"
  2442. " --linux-image=path path to linux image for bluegene block. Default if not set\n"
  2443. " --mloader-image=path path to mloader image for bluegene block. Default if not set\n"
  2444. " --ramdisk-image=path path to ramdisk image for bluegene block. Default if not set\n"
  2445. #endif
  2446. #endif
  2447. "\n"
  2448. "Help options:\n"
  2449. " -h, --help show this help message\n"
  2450. " --usage display brief usage message\n"
  2451. "\n"
  2452. "Other options:\n"
  2453. " -V, --version output version information and exit\n"
  2454. "\n"
  2455. );
  2456. }