PageRenderTime 39ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/src/common/read_config.c

https://github.com/cfenoy/slurm
C | 3796 lines | 3096 code | 402 blank | 298 comment | 860 complexity | 3b5a181d3bc89b4cb7479660715e6b7c MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * read_config.c - read the overall slurm configuration file
  3. *****************************************************************************
  4. * Copyright (C) 2002-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Portions Copyright (C) 2008 Vijay Ramasubramanian.
  7. * Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>.
  8. * Portions (boards) copyright (C) 2012 Bull, <rod.schultz@bull.com>
  9. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  10. * Written by Morris Jette <jette1@llnl.gov>.
  11. * CODE-OCEC-09-009. All rights reserved.
  12. *
  13. * This file is part of SLURM, a resource management program.
  14. * For details, see <http://www.schedmd.com/slurmdocs/>.
  15. * Please also read the included file: DISCLAIMER.
  16. *
  17. * SLURM is free software; you can redistribute it and/or modify it under
  18. * the terms of the GNU General Public License as published by the Free
  19. * Software Foundation; either version 2 of the License, or (at your option)
  20. * any later version.
  21. *
  22. * In addition, as a special exception, the copyright holders give permission
  23. * to link the code of portions of this program with the OpenSSL library under
  24. * certain conditions as described in each individual source file, and
  25. * distribute linked combinations including the two. You must obey the GNU
  26. * General Public License in all respects for all of the code used other than
  27. * OpenSSL. If you modify file(s) with this exception, you may extend this
  28. * exception to your version of the file(s), but you are not obligated to do
  29. * so. If you do not wish to do so, delete this exception statement from your
  30. * version. If you delete this exception statement from all source files in
  31. * the program, then also delete it here.
  32. *
  33. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  34. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  35. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  36. * details.
  37. *
  38. * You should have received a copy of the GNU General Public License along
  39. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  40. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  41. \*****************************************************************************/
  42. #ifdef HAVE_CONFIG_H
  43. # include "config.h"
  44. #endif
  45. #include <arpa/inet.h>
  46. #include <assert.h>
  47. #include <ctype.h>
  48. #include <errno.h>
  49. #include <limits.h>
  50. #include <netdb.h>
  51. #include <netinet/in.h>
  52. #include <pthread.h>
  53. #include <pwd.h>
  54. #include <stdio.h>
  55. #include <stdlib.h>
  56. #include <string.h>
  57. #include <sys/socket.h>
  58. #include <sys/stat.h>
  59. #include <sys/types.h>
  60. #include <time.h>
  61. #include <unistd.h>
  62. #include "slurm/slurm.h"
  63. #include "src/common/hostlist.h"
  64. #include "src/common/log.h"
  65. #include "src/common/macros.h"
  66. #include "src/common/node_conf.h"
  67. #include "src/common/parse_config.h"
  68. #include "src/common/parse_spec.h"
  69. #include "src/common/parse_time.h"
  70. #include "src/common/read_config.h"
  71. #include "src/common/slurm_accounting_storage.h"
  72. #include "src/common/slurm_protocol_api.h"
  73. #include "src/common/slurm_protocol_defs.h"
  74. #include "src/common/slurm_rlimits_info.h"
  75. #include "src/common/slurm_selecttype_info.h"
  76. #include "src/common/strlcpy.h"
  77. #include "src/common/uid.h"
  78. #include "src/common/util-net.h"
  79. #include "src/common/xmalloc.h"
  80. #include "src/common/xstring.h"
  81. /*
  82. ** Define slurm-specific aliases for use by plugins, see slurm_xlator.h
  83. ** for details.
  84. */
  85. strong_alias(destroy_config_key_pair, slurm_destroy_config_key_pair);
  86. strong_alias(sort_key_pairs, slurm_sort_key_pairs);
  87. /* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf"
  88. * found in slurmctld.h */
  89. slurm_ctl_conf_t slurmctld_conf;
  90. static pthread_mutex_t conf_lock = PTHREAD_MUTEX_INITIALIZER;
  91. static s_p_hashtbl_t *conf_hashtbl = NULL;
  92. static slurm_ctl_conf_t *conf_ptr = &slurmctld_conf;
  93. static bool conf_initialized = false;
  94. static s_p_hashtbl_t *default_frontend_tbl;
  95. static s_p_hashtbl_t *default_nodename_tbl;
  96. static s_p_hashtbl_t *default_partition_tbl;
  97. inline static void _normalize_debug_level(uint16_t *level);
  98. static void _init_slurm_conf(const char *file_name);
  99. #define NAME_HASH_LEN 512
  100. typedef struct names_ll_s {
  101. char *alias; /* NodeName */
  102. char *hostname; /* NodeHostname */
  103. char *address; /* NodeAddr */
  104. uint16_t port;
  105. uint16_t cpus;
  106. uint16_t boards;
  107. uint16_t sockets;
  108. uint16_t cores;
  109. uint16_t threads;
  110. slurm_addr_t addr;
  111. bool addr_initialized;
  112. struct names_ll_s *next_alias;
  113. struct names_ll_s *next_hostname;
  114. } names_ll_t;
  115. static bool nodehash_initialized = false;
  116. static names_ll_t *host_to_node_hashtbl[NAME_HASH_LEN] = {NULL};
  117. static names_ll_t *node_to_host_hashtbl[NAME_HASH_LEN] = {NULL};
  118. static void _destroy_nodename(void *ptr);
  119. static int _parse_frontend(void **dest, slurm_parser_enum_t type,
  120. const char *key, const char *value,
  121. const char *line, char **leftover);
  122. static int _parse_nodename(void **dest, slurm_parser_enum_t type,
  123. const char *key, const char *value,
  124. const char *line, char **leftover);
  125. static bool _is_valid_path(char *path, char *msg);
  126. static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
  127. const char *key, const char *value,
  128. const char *line, char **leftover);
  129. static void _destroy_partitionname(void *ptr);
  130. static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
  131. const char *key, const char *value,
  132. const char *line, char **leftover);
  133. static void _destroy_downnodes(void *ptr);
  134. static int _defunct_option(void **dest, slurm_parser_enum_t type,
  135. const char *key, const char *value,
  136. const char *line, char **leftover);
  137. static void _validate_and_set_defaults(slurm_ctl_conf_t *conf,
  138. s_p_hashtbl_t *hashtbl);
  139. s_p_options_t slurm_conf_options[] = {
  140. {"AccountingStorageEnforce", S_P_STRING},
  141. {"AccountingStorageHost", S_P_STRING},
  142. {"AccountingStorageBackupHost", S_P_STRING},
  143. {"AccountingStorageLoc", S_P_STRING},
  144. {"AccountingStoragePass", S_P_STRING},
  145. {"AccountingStoragePort", S_P_UINT32},
  146. {"AccountingStorageType", S_P_STRING},
  147. {"AccountingStorageUser", S_P_STRING},
  148. {"AccountingStoreJobComment", S_P_BOOLEAN},
  149. {"AcctGatherEnergyType", S_P_STRING},
  150. {"AcctGatherNodeFreq", S_P_UINT16},
  151. {"AuthType", S_P_STRING},
  152. {"BackupAddr", S_P_STRING},
  153. {"BackupController", S_P_STRING},
  154. {"BatchStartTimeout", S_P_UINT16},
  155. {"CheckpointType", S_P_STRING},
  156. {"CacheGroups", S_P_UINT16},
  157. {"ClusterName", S_P_STRING},
  158. {"CompleteWait", S_P_UINT16},
  159. {"ControlAddr", S_P_STRING},
  160. {"ControlMachine", S_P_STRING},
  161. {"CryptoType", S_P_STRING},
  162. {"DebugFlags", S_P_STRING},
  163. {"DefaultStorageHost", S_P_STRING},
  164. {"DefaultStorageLoc", S_P_STRING},
  165. {"DefaultStoragePass", S_P_STRING},
  166. {"DefaultStoragePort", S_P_UINT32},
  167. {"DefaultStorageType", S_P_STRING},
  168. {"DefaultStorageUser", S_P_STRING},
  169. {"DefMemPerCPU", S_P_UINT32},
  170. {"DefMemPerNode", S_P_UINT32},
  171. {"DisableRootJobs", S_P_BOOLEAN},
  172. {"EnforcePartLimits", S_P_BOOLEAN},
  173. {"Epilog", S_P_STRING},
  174. {"EpilogMsgTime", S_P_UINT32},
  175. {"EpilogSlurmctld", S_P_STRING},
  176. {"FastSchedule", S_P_UINT16},
  177. {"FirstJobId", S_P_UINT32},
  178. {"GetEnvTimeout", S_P_UINT16},
  179. {"GresTypes", S_P_STRING},
  180. {"GroupUpdateForce", S_P_UINT16},
  181. {"GroupUpdateTime", S_P_UINT16},
  182. {"HealthCheckInterval", S_P_UINT16},
  183. {"HealthCheckProgram", S_P_STRING},
  184. {"InactiveLimit", S_P_UINT16},
  185. {"JobAcctGatherType", S_P_STRING},
  186. {"JobAcctGatherFrequency", S_P_UINT16},
  187. {"JobCheckpointDir", S_P_STRING},
  188. {"JobCompHost", S_P_STRING},
  189. {"JobCompLoc", S_P_STRING},
  190. {"JobCompPass", S_P_STRING},
  191. {"JobCompPort", S_P_UINT32},
  192. {"JobCompType", S_P_STRING},
  193. {"JobCompUser", S_P_STRING},
  194. {"JobCredentialPrivateKey", S_P_STRING},
  195. {"JobCredentialPublicCertificate", S_P_STRING},
  196. {"JobFileAppend", S_P_UINT16},
  197. {"JobRequeue", S_P_UINT16},
  198. {"JobSubmitPlugins", S_P_STRING},
  199. {"KillOnBadExit", S_P_UINT16},
  200. {"KillWait", S_P_UINT16},
  201. {"LaunchType", S_P_STRING},
  202. {"Licenses", S_P_STRING},
  203. {"MailProg", S_P_STRING},
  204. {"MaxJobCount", S_P_UINT32},
  205. {"MaxJobId", S_P_UINT32},
  206. {"MaxMemPerCPU", S_P_UINT32},
  207. {"MaxMemPerNode", S_P_UINT32},
  208. {"MaxStepCount", S_P_UINT32},
  209. {"MaxTasksPerNode", S_P_UINT16},
  210. {"MessageTimeout", S_P_UINT16},
  211. {"MinJobAge", S_P_UINT16},
  212. {"MpiDefault", S_P_STRING},
  213. {"MpiParams", S_P_STRING},
  214. {"OverTimeLimit", S_P_UINT16},
  215. {"PluginDir", S_P_STRING},
  216. {"PlugStackConfig", S_P_STRING},
  217. {"PreemptMode", S_P_STRING},
  218. {"PreemptType", S_P_STRING},
  219. {"PriorityDecayHalfLife", S_P_STRING},
  220. {"PriorityCalcPeriod", S_P_STRING},
  221. {"PriorityFavorSmall", S_P_BOOLEAN},
  222. {"PriorityMaxAge", S_P_STRING},
  223. {"PriorityUsageResetPeriod", S_P_STRING},
  224. {"PriorityType", S_P_STRING},
  225. {"PriorityFlags", S_P_STRING},
  226. {"PriorityWeightAge", S_P_UINT32},
  227. {"PriorityWeightFairshare", S_P_UINT32},
  228. {"PriorityWeightJobSize", S_P_UINT32},
  229. {"PriorityWeightPartition", S_P_UINT32},
  230. {"PriorityWeightQOS", S_P_UINT32},
  231. {"PrivateData", S_P_STRING},
  232. {"ProctrackType", S_P_STRING},
  233. {"Prolog", S_P_STRING},
  234. {"PrologSlurmctld", S_P_STRING},
  235. {"PropagatePrioProcess", S_P_UINT16},
  236. {"PropagateResourceLimitsExcept", S_P_STRING},
  237. {"PropagateResourceLimits", S_P_STRING},
  238. {"RebootProgram", S_P_STRING},
  239. {"ReconfigFlags", S_P_STRING},
  240. {"ResumeProgram", S_P_STRING},
  241. {"ResumeRate", S_P_UINT16},
  242. {"ResumeTimeout", S_P_UINT16},
  243. {"ResvOverRun", S_P_UINT16},
  244. {"ReturnToService", S_P_UINT16},
  245. {"SallocDefaultCommand", S_P_STRING},
  246. {"SchedulerAuth", S_P_STRING, _defunct_option},
  247. {"SchedulerParameters", S_P_STRING},
  248. {"SchedulerPort", S_P_UINT16},
  249. {"SchedulerRootFilter", S_P_UINT16},
  250. {"SchedulerTimeSlice", S_P_UINT16},
  251. {"SchedulerType", S_P_STRING},
  252. {"SelectType", S_P_STRING},
  253. {"SelectTypeParameters", S_P_STRING},
  254. {"SlurmUser", S_P_STRING},
  255. {"SlurmdUser", S_P_STRING},
  256. {"SlurmctldDebug", S_P_STRING},
  257. {"SlurmctldLogFile", S_P_STRING},
  258. {"SlurmctldPidFile", S_P_STRING},
  259. {"SlurmctldPort", S_P_STRING},
  260. {"SlurmctldTimeout", S_P_UINT16},
  261. {"SlurmdDebug", S_P_STRING},
  262. {"SlurmdLogFile", S_P_STRING},
  263. {"SlurmdPidFile", S_P_STRING},
  264. {"SlurmdPort", S_P_UINT32},
  265. {"SlurmdSpoolDir", S_P_STRING},
  266. {"SlurmdTimeout", S_P_UINT16},
  267. {"SlurmSchedLogFile", S_P_STRING},
  268. {"SlurmSchedLogLevel", S_P_UINT16},
  269. {"SrunEpilog", S_P_STRING},
  270. {"SrunProlog", S_P_STRING},
  271. {"StateSaveLocation", S_P_STRING},
  272. {"SuspendExcNodes", S_P_STRING},
  273. {"SuspendExcParts", S_P_STRING},
  274. {"SuspendProgram", S_P_STRING},
  275. {"SuspendRate", S_P_UINT16},
  276. {"SuspendTime", S_P_LONG},
  277. {"SuspendTimeout", S_P_UINT16},
  278. {"SwitchType", S_P_STRING},
  279. {"TaskEpilog", S_P_STRING},
  280. {"TaskProlog", S_P_STRING},
  281. {"TaskPlugin", S_P_STRING},
  282. {"TaskPluginParam", S_P_STRING},
  283. {"TmpFS", S_P_STRING},
  284. {"TopologyPlugin", S_P_STRING},
  285. {"TrackWCKey", S_P_BOOLEAN},
  286. {"TreeWidth", S_P_UINT16},
  287. {"UnkillableStepProgram", S_P_STRING},
  288. {"UnkillableStepTimeout", S_P_UINT16},
  289. {"UsePAM", S_P_BOOLEAN},
  290. {"VSizeFactor", S_P_UINT16},
  291. {"WaitTime", S_P_UINT16},
  292. {"FrontendName", S_P_ARRAY, _parse_frontend, destroy_frontend},
  293. {"NodeName", S_P_ARRAY, _parse_nodename, _destroy_nodename},
  294. {"PartitionName", S_P_ARRAY, _parse_partitionname,
  295. _destroy_partitionname},
  296. {"DownNodes", S_P_ARRAY, _parse_downnodes, _destroy_downnodes},
  297. {NULL}
  298. };
  299. static bool _is_valid_path (char *path, char *msg)
  300. {
  301. /*
  302. * Allocate temporary space for walking the list of dirs:
  303. */
  304. int pathlen = strlen (path);
  305. char *buf = xmalloc (pathlen + 2);
  306. char *p, *entry;
  307. if (strlcpy (buf, path, pathlen + 1) > pathlen + 1) {
  308. error ("is_valid_path: Failed to copy path!");
  309. goto out_false;
  310. }
  311. /*
  312. * Ensure the path ends with a ':'
  313. */
  314. if (buf [pathlen - 1] != ':') {
  315. buf [pathlen] = ':';
  316. buf [pathlen + 1] = '\0';
  317. }
  318. entry = buf;
  319. while ((p = strchr (entry, ':'))) {
  320. struct stat st;
  321. /*
  322. * NUL terminate colon and advance p
  323. */
  324. *(p++) = '\0';
  325. /*
  326. * Check to see if current path element is a valid dir
  327. */
  328. if (stat (entry, &st) < 0) {
  329. error ("%s: %s: %m", msg, entry);
  330. goto out_false;
  331. }
  332. else if (!S_ISDIR (st.st_mode)) {
  333. error ("%s: %s: Not a directory", msg, entry);
  334. goto out_false;
  335. }
  336. /*
  337. * Otherwise path element is valid, continue..
  338. */
  339. entry = p;
  340. }
  341. xfree (buf);
  342. return true;
  343. out_false:
  344. xfree (buf);
  345. return false;
  346. }
  347. static int _defunct_option(void **dest, slurm_parser_enum_t type,
  348. const char *key, const char *value,
  349. const char *line, char **leftover)
  350. {
  351. error("The option \"%s\" is defunct, see man slurm.conf.", key);
  352. return 0;
  353. }
  354. #if (SYSTEM_DIMENSIONS > 1)
  355. /* Used to get the general name of the machine, used primarily
  356. * for bluegene systems. Not in general use because some systems
  357. * have multiple prefix's such as foo[1-1000],bar[1-1000].
  358. */
  359. /* Caller must be holding slurm_conf_lock() */
  360. static void _set_node_prefix(const char *nodenames)
  361. {
  362. int i;
  363. char *tmp;
  364. xassert(nodenames != NULL);
  365. for (i = 1; nodenames[i] != '\0'; i++) {
  366. if((nodenames[i-1] == '[')
  367. || (nodenames[i-1] <= '9'
  368. && nodenames[i-1] >= '0'))
  369. break;
  370. }
  371. if(i == 1) {
  372. error("In your Node definition in your slurm.conf you "
  373. "gave a nodelist '%s' without a prefix. "
  374. "Please try something like bg%s.", nodenames, nodenames);
  375. }
  376. xfree(conf_ptr->node_prefix);
  377. if(nodenames[i] == '\0')
  378. conf_ptr->node_prefix = xstrdup(nodenames);
  379. else {
  380. tmp = xmalloc(sizeof(char)*i+1);
  381. memset(tmp, 0, i+1);
  382. snprintf(tmp, i, "%s", nodenames);
  383. conf_ptr->node_prefix = tmp;
  384. tmp = NULL;
  385. }
  386. debug3("Prefix is %s %s %d", conf_ptr->node_prefix, nodenames, i);
  387. }
  388. #endif /* SYSTEM_DIMENSIONS > 1 */
  389. static int _parse_frontend(void **dest, slurm_parser_enum_t type,
  390. const char *key, const char *value,
  391. const char *line, char **leftover)
  392. {
  393. s_p_hashtbl_t *tbl, *dflt;
  394. slurm_conf_frontend_t *n;
  395. char *node_state = NULL;
  396. static s_p_options_t _frontend_options[] = {
  397. {"FrontendAddr", S_P_STRING},
  398. {"Port", S_P_UINT16},
  399. {"Reason", S_P_STRING},
  400. {"State", S_P_STRING},
  401. {NULL}
  402. };
  403. #ifndef HAVE_FRONT_END
  404. fatal("Use of FrontendName in slurm.conf without SLURM being "
  405. "configured/built with the --enable-front-end option");
  406. #endif
  407. tbl = s_p_hashtbl_create(_frontend_options);
  408. s_p_parse_line(tbl, *leftover, leftover);
  409. /* s_p_dump_values(tbl, _frontend_options); */
  410. if (strcasecmp(value, "DEFAULT") == 0) {
  411. char *tmp;
  412. if (s_p_get_string(&tmp, "FrontendAddr", tbl)) {
  413. error("FrontendAddr not allowed with "
  414. "FrontendName=DEFAULT");
  415. xfree(tmp);
  416. s_p_hashtbl_destroy(tbl);
  417. return -1;
  418. }
  419. if (default_frontend_tbl != NULL) {
  420. s_p_hashtbl_merge(tbl, default_frontend_tbl);
  421. s_p_hashtbl_destroy(default_frontend_tbl);
  422. }
  423. default_frontend_tbl = tbl;
  424. return 0;
  425. } else {
  426. n = xmalloc(sizeof(slurm_conf_frontend_t));
  427. dflt = default_frontend_tbl;
  428. n->frontends = xstrdup(value);
  429. if (!s_p_get_string(&n->addresses, "FrontendAddr", tbl))
  430. n->addresses = xstrdup(n->frontends);
  431. if (!s_p_get_uint16(&n->port, "Port", tbl) &&
  432. !s_p_get_uint16(&n->port, "Port", dflt)) {
  433. /* This gets resolved in slurm_conf_get_port()
  434. * and slurm_conf_get_addr(). For now just
  435. * leave with a value of zero */
  436. n->port = 0;
  437. }
  438. if (!s_p_get_string(&n->reason, "Reason", tbl))
  439. s_p_get_string(&n->reason, "Reason", dflt);
  440. if (!s_p_get_string(&node_state, "State", tbl) &&
  441. !s_p_get_string(&node_state, "State", dflt)) {
  442. n->node_state = NODE_STATE_UNKNOWN;
  443. } else {
  444. n->node_state = state_str2int(node_state,
  445. (char *) value);
  446. if (n->node_state == (uint16_t) NO_VAL)
  447. n->node_state = NODE_STATE_UNKNOWN;
  448. xfree(node_state);
  449. }
  450. *dest = (void *)n;
  451. s_p_hashtbl_destroy(tbl);
  452. return 1;
  453. }
  454. /* should not get here */
  455. }
  456. static int _parse_nodename(void **dest, slurm_parser_enum_t type,
  457. const char *key, const char *value,
  458. const char *line, char **leftover)
  459. {
  460. s_p_hashtbl_t *tbl, *dflt;
  461. slurm_conf_node_t *n;
  462. int computed_procs;
  463. static s_p_options_t _nodename_options[] = {
  464. {"Boards", S_P_UINT16},
  465. {"CoresPerSocket", S_P_UINT16},
  466. {"CPUs", S_P_UINT16},
  467. {"Feature", S_P_STRING},
  468. {"Gres", S_P_STRING},
  469. {"NodeAddr", S_P_STRING},
  470. {"NodeHostname", S_P_STRING},
  471. {"Port", S_P_STRING},
  472. {"Procs", S_P_UINT16},
  473. {"RealMemory", S_P_UINT32},
  474. {"Reason", S_P_STRING},
  475. {"Sockets", S_P_UINT16},
  476. {"SocketsPerBoard", S_P_UINT16},
  477. {"State", S_P_STRING},
  478. {"ThreadsPerCore", S_P_UINT16},
  479. {"TmpDisk", S_P_UINT32},
  480. {"Weight", S_P_UINT32},
  481. {NULL}
  482. };
  483. tbl = s_p_hashtbl_create(_nodename_options);
  484. s_p_parse_line(tbl, *leftover, leftover);
  485. /* s_p_dump_values(tbl, _nodename_options); */
  486. if (strcasecmp(value, "DEFAULT") == 0) {
  487. char *tmp;
  488. if (s_p_get_string(&tmp, "NodeHostname", tbl)) {
  489. error("NodeHostname not allowed with "
  490. "NodeName=DEFAULT");
  491. xfree(tmp);
  492. s_p_hashtbl_destroy(tbl);
  493. return -1;
  494. }
  495. if (s_p_get_string(&tmp, "NodeAddr", tbl)) {
  496. error("NodeAddr not allowed with NodeName=DEFAULT");
  497. xfree(tmp);
  498. s_p_hashtbl_destroy(tbl);
  499. return -1;
  500. }
  501. if (default_nodename_tbl != NULL) {
  502. s_p_hashtbl_merge(tbl, default_nodename_tbl);
  503. s_p_hashtbl_destroy(default_nodename_tbl);
  504. }
  505. default_nodename_tbl = tbl;
  506. return 0;
  507. } else {
  508. bool no_cpus = false;
  509. bool no_boards = false;
  510. bool no_sockets = false;
  511. bool no_cores = false;
  512. bool no_threads = false;
  513. bool no_sockets_per_board = false;
  514. uint16_t sockets_per_board = 0;
  515. n = xmalloc(sizeof(slurm_conf_node_t));
  516. dflt = default_nodename_tbl;
  517. n->nodenames = xstrdup(value);
  518. #if (SYSTEM_DIMENSIONS > 1)
  519. if (conf_ptr->node_prefix == NULL)
  520. _set_node_prefix(n->nodenames);
  521. #endif
  522. if (!s_p_get_string(&n->hostnames, "NodeHostname", tbl))
  523. n->hostnames = xstrdup(n->nodenames);
  524. if (!s_p_get_string(&n->addresses, "NodeAddr", tbl))
  525. n->addresses = xstrdup(n->hostnames);
  526. if (!s_p_get_uint16(&n->boards, "Boards", tbl)
  527. && !s_p_get_uint16(&n->boards, "Boards", dflt)) {
  528. n->boards = 1;
  529. no_boards = true;
  530. }
  531. if (!s_p_get_uint16(&n->cores, "CoresPerSocket", tbl)
  532. && !s_p_get_uint16(&n->cores, "CoresPerSocket", dflt)) {
  533. n->cores = 1;
  534. no_cores = true;
  535. }
  536. if (!s_p_get_string(&n->feature, "Feature", tbl))
  537. s_p_get_string(&n->feature, "Feature", dflt);
  538. if (!s_p_get_string(&n->gres, "Gres", tbl))
  539. s_p_get_string(&n->gres, "Gres", dflt);
  540. if (!s_p_get_string(&n->port_str, "Port", tbl) &&
  541. !s_p_get_string(&n->port_str, "Port", dflt)) {
  542. /* This gets resolved in slurm_conf_get_port()
  543. * and slurm_conf_get_addr(). For now just
  544. * leave with a value of NULL */
  545. }
  546. if (!s_p_get_uint16(&n->cpus, "CPUs", tbl) &&
  547. !s_p_get_uint16(&n->cpus, "CPUs", dflt) &&
  548. !s_p_get_uint16(&n->cpus, "Procs", tbl) &&
  549. !s_p_get_uint16(&n->cpus, "Procs", dflt)) {
  550. n->cpus = 1;
  551. no_cpus = true;
  552. }
  553. if (!s_p_get_uint32(&n->real_memory, "RealMemory", tbl)
  554. && !s_p_get_uint32(&n->real_memory, "RealMemory", dflt))
  555. n->real_memory = 1;
  556. if (!s_p_get_string(&n->reason, "Reason", tbl))
  557. s_p_get_string(&n->reason, "Reason", dflt);
  558. if (!s_p_get_uint16(&n->sockets, "Sockets", tbl)
  559. && !s_p_get_uint16(&n->sockets, "Sockets", dflt)) {
  560. n->sockets = 1;
  561. no_sockets = true;
  562. }
  563. if (!s_p_get_uint16(&sockets_per_board, "SocketsPerBoard", tbl)
  564. && !s_p_get_uint16(&sockets_per_board, "SocketsPerBoard",
  565. dflt)) {
  566. sockets_per_board = 1;
  567. no_sockets_per_board = true;
  568. }
  569. if (!s_p_get_string(&n->state, "State", tbl)
  570. && !s_p_get_string(&n->state, "State", dflt))
  571. n->state = NULL;
  572. if (!s_p_get_uint16(&n->threads, "ThreadsPerCore", tbl)
  573. && !s_p_get_uint16(&n->threads, "ThreadsPerCore", dflt)) {
  574. n->threads = 1;
  575. no_threads = true;
  576. }
  577. if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl)
  578. && !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt))
  579. n->tmp_disk = 0;
  580. if (!s_p_get_uint32(&n->weight, "Weight", tbl)
  581. && !s_p_get_uint32(&n->weight, "Weight", dflt))
  582. n->weight = 1;
  583. s_p_hashtbl_destroy(tbl);
  584. if (n->cores == 0) { /* make sure cores is non-zero */
  585. error("NodeNames=%s CoresPerSocket=0 is invalid, "
  586. "reset to 1", n->nodenames);
  587. n->cores = 1;
  588. }
  589. if (n->threads == 0) { /* make sure threads is non-zero */
  590. error("NodeNames=%s ThreadsPerCore=0 is invalid, "
  591. "reset to 1", n->nodenames);
  592. n->threads = 1;
  593. }
  594. if (!no_sockets_per_board && sockets_per_board==0) {
  595. /* make sure sockets_per_boards is non-zero */
  596. error("NodeNames=%s SocketsPerBoards=0 is invalid, "
  597. "reset to 1", n->nodenames);
  598. sockets_per_board = 1;
  599. }
  600. if (no_boards) {
  601. /* This case is exactly like if was without boards,
  602. * Except SocketsPerBoard=# can be used,
  603. * But it can't be used with Sockets=# */
  604. n->boards = 1;
  605. if (!no_sockets && !no_sockets_per_board) {
  606. error("NodeNames=%s Sockets=# and "
  607. "SocketsPerBoard=# is invalid"
  608. ", using SocketsPerBoard",
  609. n->nodenames);
  610. n->sockets = sockets_per_board;
  611. }
  612. if (!no_sockets_per_board) {
  613. n->sockets = sockets_per_board;
  614. }
  615. if (!no_cpus && /* infer missing Sockets= */
  616. no_sockets) {
  617. n->sockets = n->cpus / (n->cores * n->threads);
  618. }
  619. if (n->sockets == 0) { /* make sure sockets != 0 */
  620. error("NodeNames=%s Sockets=0 is invalid, "
  621. "reset to 1", n->nodenames);
  622. n->sockets = 1;
  623. }
  624. if (no_cpus) { /* infer missing CPUs= */
  625. n->cpus = n->sockets * n->cores * n->threads;
  626. }
  627. /* if only CPUs= and Sockets=
  628. * specified check for match */
  629. if (!no_cpus && !no_sockets &&
  630. no_cores && no_threads &&
  631. (n->cpus != n->sockets)) {
  632. n->sockets = n->cpus;
  633. error("NodeNames=%s CPUs doesn't match "
  634. "Sockets, setting Sockets to %d",
  635. n->nodenames, n->sockets);
  636. }
  637. computed_procs = n->sockets * n->cores * n->threads;
  638. if ((n->cpus != n->sockets) &&
  639. (n->cpus != n->sockets * n->cores) &&
  640. (n->cpus != computed_procs)) {
  641. error("NodeNames=%s CPUs=%d doesn't match "
  642. "Sockets*CoresPerSocket*ThreadsPerCore "
  643. "(%d), resetting CPUs",
  644. n->nodenames, n->cpus, computed_procs);
  645. n->cpus = computed_procs;
  646. }
  647. } else {
  648. /* In this case Boards=# is used.
  649. * CPUs=# or Procs=# are ignored.
  650. */
  651. if (!no_cpus) {
  652. error("NodeNames=%s CPUs=# or Procs=# "
  653. "with Boards=# is invalid and "
  654. "is ignored.", n->nodenames);
  655. }
  656. if (n->boards == 0) {
  657. /* make sure boards is non-zero */
  658. error("NodeNames=%s Boards=0 is "
  659. "invalid, reset to 1",
  660. n->nodenames);
  661. n->boards = 1;
  662. }
  663. if (!no_sockets && !no_sockets_per_board) {
  664. error("NodeNames=%s Sockets=# and "
  665. "SocketsPerBoard=# is invalid, "
  666. "using SocketsPerBoard", n->nodenames);
  667. n->sockets = n->boards * sockets_per_board;
  668. } else if (!no_sockets_per_board) {
  669. n->sockets = n->boards * sockets_per_board;
  670. } else if (!no_sockets) {
  671. error("NodeNames=%s Sockets=# with Boards=# is"
  672. " not recommended, assume "
  673. "SocketsPerBoard was meant",
  674. n->nodenames);
  675. if (n->sockets == 0) {
  676. /* make sure sockets is non-zero */
  677. error("NodeNames=%s Sockets=0 is "
  678. "invalid, reset to 1",
  679. n->nodenames);
  680. n->sockets = 1;
  681. }
  682. n->sockets = n->boards * n->sockets;
  683. } else {
  684. n->sockets = n->boards;
  685. }
  686. /* Node boards factored into sockets */
  687. n->cpus = n->sockets * n->cores * n->threads;
  688. }
  689. *dest = (void *)n;
  690. return 1;
  691. }
  692. /* should not get here */
  693. }
  694. /* Destroy a front_end record built by slurm_conf_frontend_array() */
  695. extern void destroy_frontend(void *ptr)
  696. {
  697. slurm_conf_frontend_t *n = (slurm_conf_frontend_t *) ptr;
  698. xfree(n->frontends);
  699. xfree(n->addresses);
  700. xfree(n->reason);
  701. xfree(ptr);
  702. }
  703. /*
  704. * list_find_frontend - find an entry in the front_end list, see list.h for
  705. * documentation
  706. * IN key - is feature name or NULL for all features
  707. * RET 1 if found, 0 otherwise
  708. */
  709. extern int list_find_frontend (void *front_end_entry, void *key)
  710. {
  711. slurm_conf_frontend_t *front_end_ptr;
  712. if (key == NULL)
  713. return 1;
  714. front_end_ptr = (slurm_conf_frontend_t *) front_end_entry;
  715. if (strcmp(front_end_ptr->frontends, (char *) key) == 0)
  716. return 1;
  717. return 0;
  718. }
  719. static void _destroy_nodename(void *ptr)
  720. {
  721. slurm_conf_node_t *n = (slurm_conf_node_t *)ptr;
  722. xfree(n->addresses);
  723. xfree(n->feature);
  724. xfree(n->hostnames);
  725. xfree(n->gres);
  726. xfree(n->nodenames);
  727. xfree(n->port_str);
  728. xfree(n->reason);
  729. xfree(n->state);
  730. xfree(ptr);
  731. }
  732. int slurm_conf_frontend_array(slurm_conf_frontend_t **ptr_array[])
  733. {
  734. int count;
  735. slurm_conf_frontend_t **ptr;
  736. if (s_p_get_array((void ***)&ptr, &count, "FrontendName",
  737. conf_hashtbl)) {
  738. *ptr_array = ptr;
  739. return count;
  740. } else {
  741. #ifdef HAVE_FRONT_END
  742. /* No FrontendName in slurm.conf. Take the NodeAddr and
  743. * NodeHostName from the first node's record and use that to
  744. * build an equivalent structure to that constructed when
  745. * FrontendName is configured. This is intended for backward
  746. * compatability with SLURM version 2.2. */
  747. static slurm_conf_frontend_t local_front_end;
  748. static slurm_conf_frontend_t *local_front_end_array[2] =
  749. {NULL, NULL};
  750. static char addresses[1024], hostnames[1024];
  751. if (local_front_end_array[0] == NULL) {
  752. slurm_conf_node_t **node_ptr;
  753. int node_count = 0;
  754. if (!s_p_get_array((void ***)&node_ptr, &node_count,
  755. "NodeName", conf_hashtbl) ||
  756. (node_count == 0))
  757. fatal("No front end nodes configured");
  758. strncpy(addresses, node_ptr[0]->addresses,
  759. sizeof(addresses));
  760. strncpy(hostnames, node_ptr[0]->hostnames,
  761. sizeof(hostnames));
  762. local_front_end.addresses = addresses;
  763. local_front_end.frontends = hostnames;
  764. if (node_ptr[0]->port_str) {
  765. local_front_end.port = atoi(node_ptr[0]->
  766. port_str);
  767. }
  768. local_front_end.reason = NULL;
  769. local_front_end.node_state = NODE_STATE_UNKNOWN;
  770. local_front_end_array[0] = &local_front_end;
  771. }
  772. *ptr_array = local_front_end_array;
  773. return 1;
  774. #else
  775. *ptr_array = NULL;
  776. return 0;
  777. #endif
  778. }
  779. }
  780. int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[])
  781. {
  782. int count;
  783. slurm_conf_node_t **ptr;
  784. if (s_p_get_array((void ***)&ptr, &count, "NodeName", conf_hashtbl)) {
  785. *ptr_array = ptr;
  786. return count;
  787. } else {
  788. *ptr_array = NULL;
  789. return 0;
  790. }
  791. }
  792. static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
  793. const char *key, const char *value,
  794. const char *line, char **leftover)
  795. {
  796. s_p_hashtbl_t *tbl, *dflt;
  797. slurm_conf_partition_t *p;
  798. char *tmp = NULL;
  799. static s_p_options_t _partition_options[] = {
  800. {"AllocNodes", S_P_STRING},
  801. {"AllowGroups", S_P_STRING},
  802. {"Alternate", S_P_STRING},
  803. {"DefMemPerCPU", S_P_UINT32},
  804. {"DefMemPerNode", S_P_UINT32},
  805. {"Default", S_P_BOOLEAN}, /* YES or NO */
  806. {"DefaultTime", S_P_STRING},
  807. {"DisableRootJobs", S_P_BOOLEAN}, /* YES or NO */
  808. {"GraceTime", S_P_UINT32},
  809. {"Hidden", S_P_BOOLEAN}, /* YES or NO */
  810. {"MaxMemPerCPU", S_P_UINT32},
  811. {"MaxMemPerNode", S_P_UINT32},
  812. {"MaxTime", S_P_STRING},
  813. {"MaxNodes", S_P_UINT32}, /* INFINITE or a number */
  814. {"MinNodes", S_P_UINT32},
  815. {"Nodes", S_P_STRING},
  816. {"PreemptMode", S_P_STRING},
  817. {"Priority", S_P_UINT16},
  818. {"RootOnly", S_P_BOOLEAN}, /* YES or NO */
  819. {"ReqResv", S_P_BOOLEAN}, /* YES or NO */
  820. {"Shared", S_P_STRING}, /* YES, NO, or FORCE */
  821. {"State", S_P_STRING}, /* UP, DOWN, INACTIVE or DRAIN */
  822. {NULL}
  823. };
  824. tbl = s_p_hashtbl_create(_partition_options);
  825. s_p_parse_line(tbl, *leftover, leftover);
  826. /* s_p_dump_values(tbl, _partition_options); */
  827. if (strcasecmp(value, "DEFAULT") == 0) {
  828. if (default_partition_tbl != NULL) {
  829. s_p_hashtbl_merge(tbl, default_partition_tbl);
  830. s_p_hashtbl_destroy(default_partition_tbl);
  831. }
  832. default_partition_tbl = tbl;
  833. return 0;
  834. } else {
  835. p = xmalloc(sizeof(slurm_conf_partition_t));
  836. dflt = default_partition_tbl;
  837. p->name = xstrdup(value);
  838. if (!s_p_get_string(&p->allow_groups, "AllowGroups", tbl))
  839. s_p_get_string(&p->allow_groups, "AllowGroups", dflt);
  840. if (p->allow_groups && strcasecmp(p->allow_groups, "ALL")==0) {
  841. xfree(p->allow_groups);
  842. p->allow_groups = NULL; /* NULL means allow all */
  843. }
  844. if (!s_p_get_string(&p->allow_alloc_nodes, "AllocNodes", tbl)) {
  845. s_p_get_string(&p->allow_alloc_nodes, "AllocNodes",
  846. dflt);
  847. if (p->allow_alloc_nodes &&
  848. (strcasecmp(p->allow_alloc_nodes, "ALL") == 0)) {
  849. /* NULL means to allow all submit notes */
  850. xfree(p->allow_alloc_nodes);
  851. }
  852. }
  853. if (!s_p_get_string(&p->alternate, "Alternate", tbl))
  854. s_p_get_string(&p->alternate, "Alternate", dflt);
  855. if (!s_p_get_boolean(&p->default_flag, "Default", tbl)
  856. && !s_p_get_boolean(&p->default_flag, "Default", dflt))
  857. p->default_flag = false;
  858. if (!s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode",
  859. tbl) &&
  860. !s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode",
  861. dflt)) {
  862. if (s_p_get_uint32(&p->def_mem_per_cpu,
  863. "DefMemPerCPU", tbl) ||
  864. s_p_get_uint32(&p->def_mem_per_cpu,
  865. "DefMemPerCPU", dflt)) {
  866. p->def_mem_per_cpu |= MEM_PER_CPU;
  867. } else {
  868. p->def_mem_per_cpu = 0;
  869. }
  870. }
  871. if (!s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode",
  872. tbl) &&
  873. !s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode",
  874. dflt)) {
  875. if (s_p_get_uint32(&p->max_mem_per_cpu,
  876. "MaxMemPerCPU", tbl) ||
  877. s_p_get_uint32(&p->max_mem_per_cpu,
  878. "MaxMemPerCPU", dflt)) {
  879. p->max_mem_per_cpu |= MEM_PER_CPU;
  880. } else {
  881. p->max_mem_per_cpu = 0;
  882. }
  883. }
  884. if (!s_p_get_boolean((bool *)&p->disable_root_jobs,
  885. "DisableRootJobs", tbl))
  886. p->disable_root_jobs = (uint16_t)NO_VAL;
  887. if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl)
  888. && !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt))
  889. p->hidden_flag = false;
  890. if (!s_p_get_string(&tmp, "MaxTime", tbl) &&
  891. !s_p_get_string(&tmp, "MaxTime", dflt))
  892. p->max_time = INFINITE;
  893. else {
  894. int max_time = time_str2mins(tmp);
  895. if ((max_time < 0) && (max_time != INFINITE)) {
  896. error("Bad value \"%s\" for MaxTime", tmp);
  897. _destroy_partitionname(p);
  898. s_p_hashtbl_destroy(tbl);
  899. xfree(tmp);
  900. return -1;
  901. }
  902. p->max_time = max_time;
  903. xfree(tmp);
  904. }
  905. if (!s_p_get_uint32(&p->grace_time, "GraceTime", tbl) &&
  906. !s_p_get_uint32(&p->grace_time, "GraceTime", dflt))
  907. p->grace_time = 0;
  908. if (!s_p_get_string(&tmp, "DefaultTime", tbl) &&
  909. !s_p_get_string(&tmp, "DefaultTime", dflt))
  910. p->default_time = NO_VAL;
  911. else {
  912. int default_time = time_str2mins(tmp);
  913. if ((default_time < 0) && (default_time != INFINITE)) {
  914. error("Bad value \"%s\" for DefaultTime", tmp);
  915. _destroy_partitionname(p);
  916. s_p_hashtbl_destroy(tbl);
  917. xfree(tmp);
  918. return -1;
  919. }
  920. p->default_time = default_time;
  921. xfree(tmp);
  922. }
  923. if (!s_p_get_uint32(&p->max_nodes, "MaxNodes", tbl)
  924. && !s_p_get_uint32(&p->max_nodes, "MaxNodes", dflt))
  925. p->max_nodes = INFINITE;
  926. if (!s_p_get_uint32(&p->min_nodes, "MinNodes", tbl)
  927. && !s_p_get_uint32(&p->min_nodes, "MinNodes", dflt))
  928. p->min_nodes = 1;
  929. if (!s_p_get_string(&p->nodes, "Nodes", tbl)
  930. && !s_p_get_string(&p->nodes, "Nodes", dflt))
  931. p->nodes = NULL;
  932. else {
  933. int i;
  934. for (i=0; p->nodes[i]; i++) {
  935. if (isspace((int)p->nodes[i]))
  936. p->nodes[i] = ',';
  937. }
  938. }
  939. if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl)
  940. && !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt))
  941. p->root_only_flag = false;
  942. if (!s_p_get_boolean(&p->req_resv_flag, "ReqResv", tbl)
  943. && !s_p_get_boolean(&p->req_resv_flag, "ReqResv", dflt))
  944. p->req_resv_flag = false;
  945. if (s_p_get_string(&tmp, "PreemptMode", tbl) ||
  946. s_p_get_string(&tmp, "PreemptMode", dflt)) {
  947. p->preempt_mode = preempt_mode_num(tmp);
  948. if (p->preempt_mode == (uint16_t) NO_VAL) {
  949. error("Bad value \"%s\" for PreemptMode", tmp);
  950. xfree(tmp);
  951. return -1;
  952. }
  953. xfree(tmp);
  954. } else
  955. p->preempt_mode = (uint16_t) NO_VAL;
  956. if (!s_p_get_uint16(&p->priority, "Priority", tbl) &&
  957. !s_p_get_uint16(&p->priority, "Priority", dflt))
  958. p->priority = 1;
  959. if (s_p_get_string(&tmp, "Shared", tbl) ||
  960. s_p_get_string(&tmp, "Shared", dflt)) {
  961. if (strcasecmp(tmp, "NO") == 0)
  962. p->max_share = 1;
  963. #ifndef HAVE_XCPU
  964. /* Only "Shared=NO" is valid on XCPU systems */
  965. else if (strcasecmp(tmp, "EXCLUSIVE") == 0)
  966. p->max_share = 0;
  967. else if (strncasecmp(tmp, "YES:", 4) == 0) {
  968. int i = strtol(&tmp[4], (char **) NULL, 10);
  969. if (i <= 1) {
  970. error("Ignoring bad Shared value: %s",
  971. tmp);
  972. p->max_share = 1; /* Shared=NO */
  973. } else
  974. p->max_share = i;
  975. } else if (strcasecmp(tmp, "YES") == 0)
  976. p->max_share = 4;
  977. else if (strncasecmp(tmp, "FORCE:", 6) == 0) {
  978. int i = strtol(&tmp[6], (char **) NULL, 10);
  979. if (i < 1) {
  980. error("Ignoring bad Shared value: %s",
  981. tmp);
  982. p->max_share = 1; /* Shared=NO */
  983. } else
  984. p->max_share = i | SHARED_FORCE;
  985. } else if (strcasecmp(tmp, "FORCE") == 0)
  986. p->max_share = 4 | SHARED_FORCE;
  987. #endif
  988. else {
  989. error("Bad value \"%s\" for Shared", tmp);
  990. _destroy_partitionname(p);
  991. s_p_hashtbl_destroy(tbl);
  992. xfree(tmp);
  993. return -1;
  994. }
  995. xfree(tmp);
  996. } else
  997. p->max_share = 1;
  998. if (s_p_get_string(&tmp, "State", tbl) ||
  999. s_p_get_string(&tmp, "State", dflt)) {
  1000. if (strncasecmp(tmp, "DOWN", 4) == 0)
  1001. p->state_up = PARTITION_DOWN;
  1002. else if (strncasecmp(tmp, "UP", 2) == 0)
  1003. p->state_up = PARTITION_UP;
  1004. else if (strncasecmp(tmp, "DRAIN", 5) == 0)
  1005. p->state_up = PARTITION_DRAIN;
  1006. else if (strncasecmp(tmp, "INACTIVE", 8) == 0)
  1007. p->state_up = PARTITION_INACTIVE;
  1008. else {
  1009. error("Bad value \"%s\" for State", tmp);
  1010. _destroy_partitionname(p);
  1011. s_p_hashtbl_destroy(tbl);
  1012. xfree(tmp);
  1013. return -1;
  1014. }
  1015. xfree(tmp);
  1016. } else
  1017. p->state_up = PARTITION_UP;
  1018. s_p_hashtbl_destroy(tbl);
  1019. *dest = (void *)p;
  1020. return 1;
  1021. }
  1022. /* should not get here */
  1023. }
  1024. static void _destroy_partitionname(void *ptr)
  1025. {
  1026. slurm_conf_partition_t *p = (slurm_conf_partition_t *)ptr;
  1027. xfree(p->allow_alloc_nodes);
  1028. xfree(p->allow_groups);
  1029. xfree(p->alternate);
  1030. xfree(p->name);
  1031. xfree(p->nodes);
  1032. xfree(ptr);
  1033. }
  1034. int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[])
  1035. {
  1036. int count;
  1037. slurm_conf_partition_t **ptr;
  1038. if (s_p_get_array((void ***)&ptr, &count, "PartitionName",
  1039. conf_hashtbl)) {
  1040. *ptr_array = ptr;
  1041. return count;
  1042. } else {
  1043. *ptr_array = NULL;
  1044. return 0;
  1045. }
  1046. }
  1047. static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
  1048. const char *key, const char *value,
  1049. const char *line, char **leftover)
  1050. {
  1051. s_p_hashtbl_t *tbl;
  1052. slurm_conf_downnodes_t *n;
  1053. static s_p_options_t _downnodes_options[] = {
  1054. {"Reason", S_P_STRING},
  1055. {"State", S_P_STRING},
  1056. {NULL}
  1057. };
  1058. tbl = s_p_hashtbl_create(_downnodes_options);
  1059. s_p_parse_line(tbl, *leftover, leftover);
  1060. /* s_p_dump_values(tbl, _downnodes_options); */
  1061. n = xmalloc(sizeof(slurm_conf_node_t));
  1062. n->nodenames = xstrdup(value);
  1063. if (!s_p_get_string(&n->reason, "Reason", tbl))
  1064. n->reason = xstrdup("Set in slurm.conf");
  1065. if (!s_p_get_string(&n->state, "State", tbl))
  1066. n->state = NULL;
  1067. s_p_hashtbl_destroy(tbl);
  1068. *dest = (void *)n;
  1069. return 1;
  1070. }
  1071. static void _destroy_downnodes(void *ptr)
  1072. {
  1073. slurm_conf_downnodes_t *n = (slurm_conf_downnodes_t *)ptr;
  1074. xfree(n->nodenames);
  1075. xfree(n->reason);
  1076. xfree(n->state);
  1077. xfree(ptr);
  1078. }
  1079. extern int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[])
  1080. {
  1081. int count;
  1082. slurm_conf_downnodes_t **ptr;
  1083. if (s_p_get_array((void ***)&ptr, &count, "DownNodes", conf_hashtbl)) {
  1084. *ptr_array = ptr;
  1085. return count;
  1086. } else {
  1087. *ptr_array = NULL;
  1088. return 0;
  1089. }
  1090. }
  1091. static void _free_name_hashtbl(void)
  1092. {
  1093. int i;
  1094. names_ll_t *p, *q;
  1095. for (i=0; i<NAME_HASH_LEN; i++) {
  1096. p = node_to_host_hashtbl[i];
  1097. while (p) {
  1098. xfree(p->alias);
  1099. xfree(p->hostname);
  1100. xfree(p->address);
  1101. q = p->next_alias;
  1102. xfree(p);
  1103. p = q;
  1104. }
  1105. node_to_host_hashtbl[i] = NULL;
  1106. host_to_node_hashtbl[i] = NULL;
  1107. }
  1108. nodehash_initialized = false;
  1109. }
  1110. static void _init_name_hashtbl(void)
  1111. {
  1112. return;
  1113. }
  1114. static int _get_hash_idx(const char *name)
  1115. {
  1116. int index = 0;
  1117. int j;
  1118. if (name == NULL)
  1119. return 0; /* degenerate case */
  1120. /* Multiply each character by its numerical position in the
  1121. * name string to add a bit of entropy, because host names such
  1122. * as cluster[0001-1000] can cause excessive index collisions.
  1123. */
  1124. for (j = 1; *name; name++, j++)
  1125. index += (int)*name * j;
  1126. index %= NAME_HASH_LEN;
  1127. if (index < 0)
  1128. index += NAME_HASH_LEN;
  1129. return index;
  1130. }
  1131. static void _push_to_hashtbls(char *alias, char *hostname,
  1132. char *address, uint16_t port,
  1133. uint16_t cpus, uint16_t boards,
  1134. uint16_t sockets, uint16_t cores,
  1135. uint16_t threads, bool front_end)
  1136. {
  1137. int hostname_idx, alias_idx;
  1138. names_ll_t *p, *new;
  1139. alias_idx = _get_hash_idx(alias);
  1140. hostname_idx = _get_hash_idx(hostname);
  1141. #if !defined(HAVE_FRONT_END) && !defined(MULTIPLE_SLURMD)
  1142. /* Ensure only one slurmd configured on each host */
  1143. p = host_to_node_hashtbl[hostname_idx];
  1144. while (p) {
  1145. if (strcmp(p->hostname, hostname) == 0) {
  1146. error("Duplicated NodeHostName %s in the config file",
  1147. hostname);
  1148. return;
  1149. }
  1150. p = p->next_hostname;
  1151. }
  1152. #endif
  1153. /* Ensure only one instance of each NodeName */
  1154. p = node_to_host_hashtbl[alias_idx];
  1155. while (p) {
  1156. if (strcmp(p->alias, alias)==0) {
  1157. if (front_end)
  1158. fatal("Frontend not configured correctly "
  1159. "in slurm.conf. See man slurm.conf "
  1160. "look for frontendname.");
  1161. fatal("Duplicated NodeName %s in the config file",
  1162. p->alias);
  1163. return;
  1164. }
  1165. p = p->next_alias;
  1166. }
  1167. /* Create the new data structure and link it into the hash tables */
  1168. new = (names_ll_t *)xmalloc(sizeof(names_ll_t));
  1169. new->alias = xstrdup(alias);
  1170. new->hostname = xstrdup(hostname);
  1171. new->address = xstrdup(address);
  1172. new->port = port;
  1173. new->cpus = cpus;
  1174. new->boards = boards;
  1175. new->sockets = sockets;
  1176. new->cores = cores;
  1177. new->threads = threads;
  1178. new->addr_initialized = false;
  1179. /* Put on end of each list */
  1180. new->next_alias = NULL;
  1181. if (node_to_host_hashtbl[alias_idx]) {
  1182. p = node_to_host_hashtbl[alias_idx];
  1183. while (p->next_alias)
  1184. p = p->next_alias;
  1185. p->next_alias = new;
  1186. } else {
  1187. node_to_host_hashtbl[alias_idx] = new;
  1188. }
  1189. new->next_hostname = NULL;
  1190. if (host_to_node_hashtbl[hostname_idx]) {
  1191. p = host_to_node_hashtbl[hostname_idx];
  1192. while (p->next_hostname)
  1193. p = p->next_hostname;
  1194. p->next_hostname = new;
  1195. } else {
  1196. host_to_node_hashtbl[hostname_idx] = new;
  1197. }
  1198. }
  1199. /*
  1200. * Register the given NodeName in the alias table.
  1201. * If node_hostname is NULL, only node_name will be used and
  1202. * no lookup table record is created.
  1203. */
  1204. static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr)
  1205. {
  1206. hostlist_t address_list = NULL;
  1207. hostlist_t alias_list = NULL;
  1208. hostlist_t hostname_list = NULL;
  1209. hostlist_t port_list = NULL;
  1210. char *address = NULL;
  1211. char *alias = NULL;
  1212. char *hostname = NULL;
  1213. char *port_str = NULL;
  1214. int error_code = SLURM_SUCCESS;
  1215. int address_count, alias_count, hostname_count, port_count, port_int;
  1216. uint16_t port = 0;
  1217. if ((node_ptr->nodenames == NULL) || (node_ptr->nodenames[0] == '\0'))
  1218. return -1;
  1219. if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) {
  1220. error("Unable to create NodeAddr list from %s",
  1221. node_ptr->addresses);
  1222. error_code = errno;
  1223. goto cleanup;
  1224. }
  1225. if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) {
  1226. error("Unable to create NodeName list from %s",
  1227. node_ptr->nodenames);
  1228. error_code = errno;
  1229. goto cleanup;
  1230. }
  1231. if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) {
  1232. error("Unable to create NodeHostname list from %s",
  1233. node_ptr->hostnames);
  1234. error_code = errno;
  1235. goto cleanup;
  1236. }
  1237. if (node_ptr->port_str && node_ptr->port_str[0] &&
  1238. (node_ptr->port_str[0] != '[') &&
  1239. (strchr(node_ptr->port_str, '-') ||
  1240. strchr(node_ptr->port_str, ','))) {
  1241. xstrfmtcat(port_str, "[%s]", node_ptr->port_str);
  1242. port_list = hostlist_create(port_str);
  1243. xfree(port_str);
  1244. } else {
  1245. port_list = hostlist_create(node_ptr->port_str);
  1246. }
  1247. if (port_list == NULL) {
  1248. error("Unable to create Port list from %s",
  1249. node_ptr->port_str);
  1250. error_code = errno;
  1251. goto cleanup;
  1252. }
  1253. #if (SYSTEM_DIMENSIONS > 1)
  1254. if (conf_ptr->node_prefix == NULL)
  1255. _set_node_prefix(node_ptr->nodenames);
  1256. #endif
  1257. /* some sanity checks */
  1258. address_count = hostlist_count(address_list);
  1259. alias_count = hostlist_count(alias_list);
  1260. hostname_count = hostlist_count(hostname_list);
  1261. port_count = hostlist_count(port_list);
  1262. #ifdef HAVE_FRONT_END
  1263. if ((address_count != alias_count) && (address_count != 1)) {
  1264. error("NodeAddr count must equal that of NodeName "
  1265. "records of there must be no more than one");
  1266. goto cleanup;
  1267. }
  1268. if ((hostname_count != alias_count) && (hostname_count != 1)) {
  1269. error("NodeHostname count must equal that of NodeName "
  1270. "records of there must be no more than one");
  1271. goto cleanup;
  1272. }
  1273. #else
  1274. #ifdef MULTIPLE_SLURMD
  1275. if ((address_count != alias_count) && (address_count != 1)) {
  1276. error("NodeAddr count must equal that of NodeName "
  1277. "records of there must be no more than one");
  1278. goto cleanup;
  1279. }
  1280. #else
  1281. if (address_count < alias_count) {
  1282. error("At least as many NodeAddr are required as NodeName");
  1283. goto cleanup;
  1284. }
  1285. if (hostname_count < alias_count) {
  1286. error("At least as many NodeHostname are required "
  1287. "as NodeName");
  1288. goto cleanup;
  1289. }
  1290. #endif /* MULTIPLE_SLURMD */
  1291. #endif /* HAVE_FRONT_END */
  1292. if ((port_count != alias_count) && (port_count > 1)) {
  1293. error("Port count must equal that of NodeName "
  1294. "records or there must be no more than one");
  1295. goto cleanup;
  1296. }
  1297. /* now build the individual node structures */
  1298. while ((alias = hostlist_shift(alias_list))) {
  1299. if (address_count > 0) {
  1300. address_count--;
  1301. if (address)
  1302. free(address);
  1303. address = hostlist_shift(address_list);
  1304. }
  1305. if (hostname_count > 0) {
  1306. hostname_count--;
  1307. if (hostname)
  1308. free(hostname);
  1309. hostname = hostlist_shift(hostname_list);
  1310. }
  1311. if (port_count > 0) {
  1312. port_count--;
  1313. if (port_str)
  1314. free(port_str);
  1315. port_str = hostlist_shift(port_list);
  1316. port_int = atoi(port_str);
  1317. if ((port_int <= 0) || (port_int > 0xffff))
  1318. fatal("Invalid Port %s", node_ptr->port_str);
  1319. port = port_int;
  1320. }
  1321. _push_to_hashtbls(alias, hostname, address, port,
  1322. node_ptr->cpus, node_ptr->boards,
  1323. node_ptr->sockets, node_ptr->cores,
  1324. node_ptr->threads, 0);
  1325. free(alias);
  1326. }
  1327. if (address)
  1328. free(address);
  1329. if (hostname)
  1330. free(hostname);
  1331. if (port_str)
  1332. free(port_str);
  1333. /* free allocated storage */
  1334. cleanup:
  1335. if (address_list)
  1336. hostlist_destroy(address_list);
  1337. if (alias_list)
  1338. hostlist_destroy(alias_list);
  1339. if (hostname_list)
  1340. hostlist_destroy(hostname_list);
  1341. if (port_list)
  1342. hostlist_destroy(port_list);
  1343. return error_code;
  1344. }
  1345. static int _register_front_ends(slurm_conf_frontend_t *front_end_ptr)
  1346. {
  1347. hostlist_t hostname_list = NULL;
  1348. hostlist_t address_list = NULL;
  1349. char *hostname = NULL;
  1350. char *address = NULL;
  1351. int error_code = SLURM_SUCCESS;
  1352. if ((front_end_ptr->frontends == NULL) ||
  1353. (front_end_ptr->frontends[0] == '\0'))
  1354. return -1;
  1355. if ((hostname_list = hostlist_create(front_end_ptr->frontends))
  1356. == NULL) {
  1357. error("Unable to create FrontendNames list from %s",
  1358. front_end_ptr->frontends);
  1359. error_code = errno;
  1360. goto cleanup;
  1361. }
  1362. if ((address_list = hostlist_create(front_end_ptr->addresses))
  1363. == NULL) {
  1364. error("Unable to create FrontendAddr list from %s",
  1365. front_end_ptr->addresses);
  1366. error_code = errno;
  1367. goto cleanup;
  1368. }
  1369. if (hostlist_count(address_list) != hostlist_count(hostname_list)) {
  1370. error("Node count mismatch between FrontendNames and "
  1371. "FrontendAddr");
  1372. goto cleanup;
  1373. }
  1374. while ((hostname = hostlist_shift(hostname_list))) {
  1375. address = hostlist_shift(address_list);
  1376. _push_to_hashtbls(hostname, hostname, address,
  1377. front_end_ptr->port, 1, 1, 1, 1, 1, 1);
  1378. free(hostname);
  1379. free(address);
  1380. }
  1381. /* free allocated storage */
  1382. cleanup:
  1383. if (hostname_list)
  1384. hostlist_destroy(hostname_list);
  1385. if (address_list)
  1386. hostlist_destroy(address_list);
  1387. return error_code;
  1388. }
  1389. static void _init_slurmd_nodehash(void)
  1390. {
  1391. slurm_conf_node_t **ptr_array;
  1392. slurm_conf_frontend_t **ptr_front_end;
  1393. int count, i;
  1394. if (nodehash_initialized)
  1395. return;
  1396. else
  1397. nodehash_initialized = true;
  1398. if (!conf_initialized) {
  1399. _init_slurm_conf(NULL);
  1400. conf_initialized = true;
  1401. }
  1402. count = slurm_conf_nodename_array(&ptr_array);
  1403. for (i = 0; i < count; i++)
  1404. _register_conf_node_aliases(ptr_array[i]);
  1405. count = slurm_conf_frontend_array(&ptr_front_end);
  1406. for (i = 0; i < count; i++)
  1407. _register_front_ends(ptr_front_end[i]);
  1408. }
  1409. /*
  1410. * Caller needs to call slurm_conf_lock() and hold the lock before
  1411. * calling this function (and call slurm_conf_unlock() afterwards).
  1412. */
  1413. static char *_internal_get_hostname(const char *node_name)
  1414. {
  1415. int idx;
  1416. names_ll_t *p;
  1417. _init_slurmd_nodehash();
  1418. idx = _get_hash_idx(node_name);
  1419. p = node_to_host_hashtbl[idx];
  1420. while (p) {
  1421. if (strcmp(p->alias, node_name) == 0) {
  1422. return xstrdup(p->hostname);
  1423. }
  1424. p = p->next_alias;
  1425. }
  1426. return NULL;
  1427. }
  1428. /*
  1429. * slurm_conf_get_hostname - Return the NodeHostname for given NodeName
  1430. */
  1431. extern char *slurm_conf_get_hostname(const char *node_name)
  1432. {
  1433. char *hostname = NULL;
  1434. slurm_conf_lock();
  1435. hostname = _internal_get_hostname(node_name);
  1436. slurm_conf_unlock();
  1437. return hostname;
  1438. }
  1439. /*
  1440. * slurm_conf_get_nodename - Return the NodeName for given NodeHostname
  1441. *
  1442. * NOTE: Call xfree() to release returned value's memory.
  1443. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1444. */
  1445. extern char *slurm_conf_get_nodename(const char *node_hostname)
  1446. {
  1447. char *alias = NULL;
  1448. int idx;
  1449. names_ll_t *p;
  1450. #ifdef HAVE_FRONT_END
  1451. slurm_conf_frontend_t *front_end_ptr = NULL;
  1452. slurm_conf_lock();
  1453. if (!front_end_list) {
  1454. debug("front_end_list is NULL");
  1455. } else {
  1456. front_end_ptr = list_find_first(front_end_list,
  1457. list_find_frontend,
  1458. (char *) node_hostname);
  1459. if (front_end_ptr) {
  1460. alias = xstrdup(front_end_ptr->frontends);
  1461. slurm_conf_unlock();
  1462. return alias;
  1463. }
  1464. }
  1465. #else
  1466. slurm_conf_lock();
  1467. #endif
  1468. _init_slurmd_nodehash();
  1469. idx = _get_hash_idx(node_hostname);
  1470. p = host_to_node_hashtbl[idx];
  1471. while (p) {
  1472. if (strcmp(p->hostname, node_hostname) == 0) {
  1473. alias = xstrdup(p->alias);
  1474. break;
  1475. }
  1476. p = p->next_hostname;
  1477. }
  1478. slurm_conf_unlock();
  1479. return alias;
  1480. }
  1481. /*
  1482. * slurm_conf_get_aliases - Return all the nodes NodeName value
  1483. * associated to a given NodeHostname (usefull in case of multiple-slurmd
  1484. * to get the list of virtual nodes associated with a real node)
  1485. *
  1486. * NOTE: Call xfree() to release returned value's memory.
  1487. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1488. */
  1489. extern char *slurm_conf_get_aliases(const char *node_hostname)
  1490. {
  1491. int idx;
  1492. names_ll_t *p;
  1493. char *aliases = NULL;
  1494. char *s = NULL;
  1495. slurm_conf_lock();
  1496. _init_slurmd_nodehash();
  1497. idx = _get_hash_idx(node_hostname);
  1498. p = host_to_node_hashtbl[idx];
  1499. while (p) {
  1500. if (strcmp(p->hostname, node_hostname) == 0) {
  1501. if ( aliases == NULL )
  1502. aliases = xstrdup(p->alias);
  1503. else {
  1504. s = xstrdup_printf("%s %s",aliases,p->alias);
  1505. xfree(aliases);
  1506. aliases = s;
  1507. }
  1508. }
  1509. p = p->next_hostname;
  1510. }
  1511. slurm_conf_unlock();
  1512. return aliases;
  1513. }
  1514. /*
  1515. * slurm_conf_get_nodeaddr - Return the NodeAddr for given NodeHostname
  1516. *
  1517. * NOTE: Call xfree() to release returned value's memory.
  1518. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1519. */
  1520. extern char *slurm_conf_get_nodeaddr(const char *node_hostname)
  1521. {
  1522. int idx;
  1523. names_ll_t *p;
  1524. slurm_conf_lock();
  1525. _init_slurmd_nodehash();
  1526. idx = _get_hash_idx(node_hostname);
  1527. p = host_to_node_hashtbl[idx];
  1528. while (p) {
  1529. if (strcmp(p->hostname, node_hostname) == 0) {
  1530. char *nodeaddr;
  1531. if (p->address != NULL)
  1532. nodeaddr = xstrdup(p->address);
  1533. else
  1534. nodeaddr = NULL;
  1535. slurm_conf_unlock();
  1536. return nodeaddr;
  1537. }
  1538. p = p->next_hostname;
  1539. }
  1540. slurm_conf_unlock();
  1541. return NULL;
  1542. }
  1543. /*
  1544. * slurm_conf_get_nodename_from_addr - Return the NodeName for given NodeAddr
  1545. *
  1546. * NOTE: Call xfree() to release returned value's memory.
  1547. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1548. */
  1549. extern char *slurm_conf_get_nodename_from_addr(const char *node_addr)
  1550. {
  1551. unsigned char buf[HOSTENT_SIZE];
  1552. struct hostent *hptr;
  1553. unsigned long addr = inet_addr(node_addr);
  1554. char *start_name, *ret_name = NULL, *dot_ptr;
  1555. if (!(hptr = get_host_by_addr((char *)&addr, sizeof(addr), AF_INET,
  1556. buf, sizeof(buf), NULL))) {
  1557. error("No node found with addr %s", node_addr);
  1558. return NULL;
  1559. }
  1560. if (!strcmp(hptr->h_name, "localhost")) {
  1561. start_name = xshort_hostname();
  1562. } else {
  1563. start_name = xstrdup(hptr->h_name);
  1564. dot_ptr = strchr(start_name, '.');
  1565. if (dot_ptr == NULL)
  1566. dot_ptr = start_name + strlen(start_name);
  1567. else
  1568. dot_ptr[0] = '\0';
  1569. }
  1570. ret_name = slurm_conf_get_aliases(start_name);
  1571. xfree(start_name);
  1572. return ret_name;
  1573. }
  1574. /*
  1575. * slurm_conf_get_aliased_nodename - Return the NodeName for the
  1576. * complete hostname string returned by gethostname if there is
  1577. * such a match, otherwise iterate through any aliases returned
  1578. * by get_host_by_name
  1579. */
  1580. extern char *slurm_conf_get_aliased_nodename()
  1581. {
  1582. char hostname_full[1024];
  1583. int error_code;
  1584. char *nodename;
  1585. error_code = gethostname(hostname_full, sizeof(hostname_full));
  1586. /* we shouldn't have any problem here since by the time
  1587. * this function has been called, gethostname_short,
  1588. * which invokes gethostname, has probably already been called
  1589. * successfully, so just return NULL if something weird
  1590. * happens at this point
  1591. */
  1592. if (error_code)
  1593. return NULL;
  1594. nodename = slurm_conf_get_nodename(hostname_full);
  1595. /* if the full hostname did not match a nodename */
  1596. if (nodename == NULL) {
  1597. /* use get_host_by_name; buffer sizes, semantics, etc.
  1598. * copied from slurm_protocol_socket_implementation.c
  1599. */
  1600. struct hostent * he = NULL;
  1601. char * h_buf[4096];
  1602. int h_err;
  1603. he = get_host_by_name(hostname_full, (void *)&h_buf,
  1604. sizeof(h_buf), &h_err);
  1605. if (he != NULL) {
  1606. unsigned int i = 0;
  1607. /* check the "official" host name first */
  1608. nodename = slurm_conf_get_nodename(he->h_name);
  1609. while ((nodename == NULL) &&
  1610. (he->h_aliases[i] != NULL)) {
  1611. /* the "official" name still didn't match --
  1612. * iterate through the aliases */
  1613. nodename =
  1614. slurm_conf_get_nodename(he->h_aliases[i]);
  1615. i++;
  1616. }
  1617. }
  1618. }
  1619. return nodename;
  1620. }
  1621. /*
  1622. * slurm_conf_get_port - Return the port for a given NodeName
  1623. */
  1624. extern uint16_t slurm_conf_get_port(const char *node_name)
  1625. {
  1626. int idx;
  1627. names_ll_t *p;
  1628. slurm_conf_lock();
  1629. _init_slurmd_nodehash();
  1630. idx = _get_hash_idx(node_name);
  1631. p = node_to_host_hashtbl[idx];
  1632. while (p) {
  1633. if (strcmp(p->alias, node_name) == 0) {
  1634. uint16_t port;
  1635. if (!p->port)
  1636. p->port = (uint16_t) conf_ptr->slurmd_port;
  1637. port = p->port;
  1638. slurm_conf_unlock();
  1639. return port;
  1640. }
  1641. p = p->next_alias;
  1642. }
  1643. slurm_conf_unlock();
  1644. return 0;
  1645. }
  1646. /*
  1647. * slurm_reset_alias - Reset the address and hostname of a specific node name
  1648. */
  1649. extern void slurm_reset_alias(char *node_name, char *node_addr,
  1650. char *node_hostname)
  1651. {
  1652. int idx;
  1653. names_ll_t *p;
  1654. slurm_conf_lock();
  1655. _init_slurmd_nodehash();
  1656. idx = _get_hash_idx(node_name);
  1657. p = node_to_host_hashtbl[idx];
  1658. while (p) {
  1659. if (strcmp(p->alias, node_name) == 0) {
  1660. if (node_addr) {
  1661. xfree(p->address);
  1662. p->address = xstrdup(node_addr);
  1663. p->addr_initialized = false;
  1664. }
  1665. if (node_hostname) {
  1666. xfree(p->hostname);
  1667. p->hostname = xstrdup(node_hostname);
  1668. }
  1669. break;
  1670. }
  1671. p = p->next_alias;
  1672. }
  1673. slurm_conf_unlock();
  1674. return;
  1675. }
  1676. /*
  1677. * slurm_conf_get_addr - Return the slurm_addr_t for a given NodeName
  1678. * Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure.
  1679. */
  1680. extern int slurm_conf_get_addr(const char *node_name, slurm_addr_t *address)
  1681. {
  1682. int idx;
  1683. names_ll_t *p;
  1684. slurm_conf_lock();
  1685. _init_slurmd_nodehash();
  1686. idx = _get_hash_idx(node_name);
  1687. p = node_to_host_hashtbl[idx];
  1688. while (p) {
  1689. if (strcmp(p->alias, node_name) == 0) {
  1690. if (!p->port)
  1691. p->port = (uint16_t) conf_ptr->slurmd_port;
  1692. if (!p->addr_initialized) {
  1693. slurm_set_addr(&p->addr, p->port, p->address);
  1694. if (p->addr.sin_family == 0 &&
  1695. p->addr.sin_port == 0) {
  1696. slurm_conf_unlock();
  1697. return SLURM_FAILURE;
  1698. }
  1699. p->addr_initialized = true;
  1700. }
  1701. *address = p->addr;
  1702. slurm_conf_unlock();
  1703. return SLURM_SUCCESS;
  1704. }
  1705. p = p->next_alias;
  1706. }
  1707. slurm_conf_unlock();
  1708. return SLURM_FAILURE;
  1709. }
  1710. /*
  1711. * slurm_conf_get_cpus_bsct -
  1712. * Return the cpus, boards, sockets, cores, and threads configured for a
  1713. * given NodeName
  1714. * Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure.
  1715. */
  1716. extern int slurm_conf_get_cpus_bsct(const char *node_name,
  1717. uint16_t *cpus, uint16_t *boards,
  1718. uint16_t *sockets, uint16_t *cores,
  1719. uint16_t *threads)
  1720. {
  1721. int idx;
  1722. names_ll_t *p;
  1723. slurm_conf_lock();
  1724. _init_slurmd_nodehash();
  1725. idx = _get_hash_idx(node_name);
  1726. p = node_to_host_hashtbl[idx];
  1727. while (p) {
  1728. if (strcmp(p->alias, node_name) == 0) {
  1729. if (cpus)
  1730. *cpus = p->cpus;
  1731. if (boards)
  1732. *boards = p->boards;
  1733. if (sockets)
  1734. *sockets = p->sockets;
  1735. if (cores)
  1736. *cores = p->cores;
  1737. if (threads)
  1738. *threads = p->threads;
  1739. slurm_conf_unlock();
  1740. return SLURM_SUCCESS;
  1741. }
  1742. p = p->next_alias;
  1743. }
  1744. slurm_conf_unlock();
  1745. return SLURM_FAILURE;
  1746. }
  1747. /* gethostname_short - equivalent to gethostname, but return only the first
  1748. * component of the fully qualified name
  1749. * (e.g. "linux123.foo.bar" becomes "linux123")
  1750. * OUT name
  1751. */
  1752. int
  1753. gethostname_short (char *name, size_t len)
  1754. {
  1755. int error_code, name_len;
  1756. char *dot_ptr, path_name[1024];
  1757. error_code = gethostname (path_name, sizeof(path_name));
  1758. if (error_code)
  1759. return error_code;
  1760. dot_ptr = strchr (path_name, '.');
  1761. if (dot_ptr == NULL)
  1762. dot_ptr = path_name + strlen(path_name);
  1763. else
  1764. dot_ptr[0] = '\0';
  1765. name_len = (dot_ptr - path_name);
  1766. if (name_len > len)
  1767. return ENAMETOOLONG;
  1768. strcpy (name, path_name);
  1769. return 0;
  1770. }
  1771. /*
  1772. * free_slurm_conf - free all storage associated with a slurm_ctl_conf_t.
  1773. * IN/OUT ctl_conf_ptr - pointer to data structure to be freed
  1774. * IN purge_node_hash - purge system-wide node hash table if set,
  1775. * set to zero if clearing private copy of config data
  1776. */
  1777. extern void
  1778. free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash)
  1779. {
  1780. xfree (ctl_conf_ptr->accounting_storage_backup_host);
  1781. xfree (ctl_conf_ptr->accounting_storage_host);
  1782. xfree (ctl_conf_ptr->accounting_storage_loc);
  1783. xfree (ctl_conf_ptr->accounting_storage_pass);
  1784. xfree (ctl_conf_ptr->accounting_storage_type);
  1785. xfree (ctl_conf_ptr->accounting_storage_user);
  1786. xfree (ctl_conf_ptr->authtype);
  1787. xfree (ctl_conf_ptr->backup_addr);
  1788. xfree (ctl_conf_ptr->backup_controller);
  1789. xfree (ctl_conf_ptr->checkpoint_type);
  1790. xfree (ctl_conf_ptr->cluster_name);
  1791. xfree (ctl_conf_ptr->control_addr);
  1792. xfree (ctl_conf_ptr->control_machine);
  1793. xfree (ctl_conf_ptr->crypto_type);
  1794. xfree (ctl_conf_ptr->acct_gather_energy_type);
  1795. xfree (ctl_conf_ptr->epilog);
  1796. xfree (ctl_conf_ptr->epilog_slurmctld);
  1797. xfree (ctl_conf_ptr->gres_plugins);
  1798. xfree (ctl_conf_ptr->health_check_program);
  1799. xfree (ctl_conf_ptr->job_acct_gather_type);
  1800. xfree (ctl_conf_ptr->job_ckpt_dir);
  1801. xfree (ctl_conf_ptr->job_comp_host);
  1802. xfree (ctl_conf_ptr->job_comp_loc);
  1803. xfree (ctl_conf_ptr->job_comp_pass);
  1804. xfree (ctl_conf_ptr->job_comp_type);
  1805. xfree (ctl_conf_ptr->job_comp_user);
  1806. xfree (ctl_conf_ptr->job_credential_private_key);
  1807. xfree (ctl_conf_ptr->job_credential_public_certificate);
  1808. xfree (ctl_conf_ptr->job_submit_plugins);
  1809. xfree (ctl_conf_ptr->launch_type);
  1810. xfree (ctl_conf_ptr->licenses);
  1811. xfree (ctl_conf_ptr->licenses_used);
  1812. xfree (ctl_conf_ptr->mail_prog);
  1813. xfree (ctl_conf_ptr->mpi_default);
  1814. xfree (ctl_conf_ptr->mpi_params);
  1815. xfree (ctl_conf_ptr->node_prefix);
  1816. xfree (ctl_conf_ptr->plugindir);
  1817. xfree (ctl_conf_ptr->plugstack);
  1818. xfree (ctl_conf_ptr->preempt_type);
  1819. xfree (ctl_conf_ptr->priority_type);
  1820. xfree (ctl_conf_ptr->proctrack_type);
  1821. xfree (ctl_conf_ptr->prolog);
  1822. xfree (ctl_conf_ptr->prolog_slurmctld);
  1823. xfree (ctl_conf_ptr->propagate_rlimits);
  1824. xfree (ctl_conf_ptr->propagate_rlimits_except);
  1825. xfree (ctl_conf_ptr->reboot_program);
  1826. xfree (ctl_conf_ptr->resume_program);
  1827. xfree (ctl_conf_ptr->salloc_default_command);
  1828. xfree (ctl_conf_ptr->sched_logfile);
  1829. xfree (ctl_conf_ptr->sched_params);
  1830. xfree (ctl_conf_ptr->schedtype);
  1831. xfree (ctl_conf_ptr->select_type);
  1832. if (ctl_conf_ptr->select_conf_key_pairs)
  1833. list_destroy((List)ctl_conf_ptr->select_conf_key_pairs);
  1834. xfree (ctl_conf_ptr->slurm_conf);
  1835. xfree (ctl_conf_ptr->slurm_user_name);
  1836. xfree (ctl_conf_ptr->slurmctld_logfile);
  1837. xfree (ctl_conf_ptr->slurmctld_pidfile);
  1838. xfree (ctl_conf_ptr->slurmd_logfile);
  1839. xfree (ctl_conf_ptr->slurmd_pidfile);
  1840. xfree (ctl_conf_ptr->slurmd_spooldir);
  1841. xfree (ctl_conf_ptr->slurmd_user_name);
  1842. xfree (ctl_conf_ptr->srun_epilog);
  1843. xfree (ctl_conf_ptr->srun_prolog);
  1844. xfree (ctl_conf_ptr->state_save_location);
  1845. xfree (ctl_conf_ptr->suspend_exc_nodes);
  1846. xfree (ctl_conf_ptr->suspend_exc_parts);
  1847. xfree (ctl_conf_ptr->suspend_program);
  1848. xfree (ctl_conf_ptr->switch_type);
  1849. xfree (ctl_conf_ptr->task_epilog);
  1850. xfree (ctl_conf_ptr->task_plugin);
  1851. xfree (ctl_conf_ptr->task_prolog);
  1852. xfree (ctl_conf_ptr->tmp_fs);
  1853. xfree (ctl_conf_ptr->topology_plugin);
  1854. xfree (ctl_conf_ptr->unkillable_program);
  1855. xfree (ctl_conf_ptr->version);
  1856. xfree (ctl_conf_ptr->z_char);
  1857. if (purge_node_hash)
  1858. _free_name_hashtbl();
  1859. }
  1860. /*
  1861. * init_slurm_conf - initialize or re-initialize the slurm configuration
  1862. * values to defaults (NULL or NO_VAL). Note that the configuration
  1863. * file pathname (slurm_conf) is not changed.
  1864. * IN/OUT ctl_conf_ptr - pointer to data structure to be initialized
  1865. */
  1866. void
  1867. init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
  1868. {
  1869. ctl_conf_ptr->last_update = time(NULL);
  1870. xfree (ctl_conf_ptr->accounting_storage_backup_host);
  1871. ctl_conf_ptr->accounting_storage_enforce = 0;
  1872. xfree (ctl_conf_ptr->accounting_storage_host);
  1873. xfree (ctl_conf_ptr->accounting_storage_loc);
  1874. xfree (ctl_conf_ptr->accounting_storage_pass);
  1875. ctl_conf_ptr->accounting_storage_port = 0;
  1876. xfree (ctl_conf_ptr->accounting_storage_type);
  1877. xfree (ctl_conf_ptr->accounting_storage_user);
  1878. xfree (ctl_conf_ptr->authtype);
  1879. xfree (ctl_conf_ptr->backup_addr);
  1880. xfree (ctl_conf_ptr->backup_controller);
  1881. ctl_conf_ptr->batch_start_timeout = 0;
  1882. xfree (ctl_conf_ptr->checkpoint_type);
  1883. xfree (ctl_conf_ptr->cluster_name);
  1884. ctl_conf_ptr->complete_wait = (uint16_t) NO_VAL;
  1885. xfree (ctl_conf_ptr->control_addr);
  1886. xfree (ctl_conf_ptr->control_machine);
  1887. xfree (ctl_conf_ptr->crypto_type);
  1888. ctl_conf_ptr->def_mem_per_cpu = 0;
  1889. ctl_conf_ptr->debug_flags = 0;
  1890. ctl_conf_ptr->disable_root_jobs = 0;
  1891. ctl_conf_ptr->acct_gather_node_freq = 0;
  1892. xfree (ctl_conf_ptr->acct_gather_energy_type);
  1893. ctl_conf_ptr->enforce_part_limits = 0;
  1894. xfree (ctl_conf_ptr->epilog);
  1895. ctl_conf_ptr->epilog_msg_time = (uint32_t) NO_VAL;
  1896. ctl_conf_ptr->fast_schedule = (uint16_t) NO_VAL;
  1897. ctl_conf_ptr->first_job_id = NO_VAL;
  1898. ctl_conf_ptr->get_env_timeout = 0;
  1899. xfree(ctl_conf_ptr->gres_plugins);
  1900. ctl_conf_ptr->group_info = (uint16_t) NO_VAL;
  1901. ctl_conf_ptr->hash_val = (uint32_t) NO_VAL;
  1902. ctl_conf_ptr->health_check_interval = 0;
  1903. xfree(ctl_conf_ptr->health_check_program);
  1904. ctl_conf_ptr->inactive_limit = (uint16_t) NO_VAL;
  1905. xfree (ctl_conf_ptr->job_acct_gather_type);
  1906. ctl_conf_ptr->job_acct_gather_freq = 0;
  1907. xfree (ctl_conf_ptr->job_ckpt_dir);
  1908. xfree (ctl_conf_ptr->job_comp_loc);
  1909. xfree (ctl_conf_ptr->job_comp_pass);
  1910. ctl_conf_ptr->job_comp_port = 0;
  1911. xfree (ctl_conf_ptr->job_comp_type);
  1912. xfree (ctl_conf_ptr->job_comp_user);
  1913. xfree (ctl_conf_ptr->job_credential_private_key);
  1914. xfree (ctl_conf_ptr->job_credential_public_certificate);
  1915. ctl_conf_ptr->job_file_append = (uint16_t) NO_VAL;
  1916. ctl_conf_ptr->job_requeue = (uint16_t) NO_VAL;
  1917. xfree(ctl_conf_ptr->job_submit_plugins);
  1918. ctl_conf_ptr->kill_wait = (uint16_t) NO_VAL;
  1919. xfree (ctl_conf_ptr->launch_type);
  1920. xfree (ctl_conf_ptr->licenses);
  1921. xfree (ctl_conf_ptr->mail_prog);
  1922. ctl_conf_ptr->max_job_cnt = (uint32_t) NO_VAL;
  1923. ctl_conf_ptr->max_job_id = NO_VAL;
  1924. ctl_conf_ptr->max_mem_per_cpu = 0;
  1925. ctl_conf_ptr->max_step_cnt = (uint32_t) NO_VAL;
  1926. ctl_conf_ptr->min_job_age = (uint16_t) NO_VAL;
  1927. xfree (ctl_conf_ptr->mpi_default);
  1928. xfree (ctl_conf_ptr->mpi_params);
  1929. ctl_conf_ptr->msg_timeout = (uint16_t) NO_VAL;
  1930. ctl_conf_ptr->next_job_id = (uint32_t) NO_VAL;
  1931. xfree (ctl_conf_ptr->node_prefix);
  1932. ctl_conf_ptr->over_time_limit = 0;
  1933. xfree (ctl_conf_ptr->plugindir);
  1934. xfree (ctl_conf_ptr->plugstack);
  1935. ctl_conf_ptr->preempt_mode = 0;
  1936. xfree (ctl_conf_ptr->preempt_type);
  1937. ctl_conf_ptr->private_data = 0;
  1938. xfree (ctl_conf_ptr->proctrack_type);
  1939. xfree (ctl_conf_ptr->prolog);
  1940. ctl_conf_ptr->propagate_prio_process = (uint16_t) NO_VAL;
  1941. xfree (ctl_conf_ptr->propagate_rlimits);
  1942. xfree (ctl_conf_ptr->propagate_rlimits_except);
  1943. xfree (ctl_conf_ptr->reboot_program);
  1944. ctl_conf_ptr->reconfig_flags = 0;
  1945. ctl_conf_ptr->resume_timeout = 0;
  1946. xfree (ctl_conf_ptr->resume_program);
  1947. ctl_conf_ptr->resume_rate = (uint16_t) NO_VAL;
  1948. ctl_conf_ptr->resv_over_run = 0;
  1949. ctl_conf_ptr->ret2service = (uint16_t) NO_VAL;
  1950. xfree( ctl_conf_ptr->salloc_default_command);
  1951. xfree( ctl_conf_ptr->sched_params );
  1952. ctl_conf_ptr->sched_time_slice = (uint16_t) NO_VAL;
  1953. xfree( ctl_conf_ptr->schedtype );
  1954. ctl_conf_ptr->schedport = (uint16_t) NO_VAL;
  1955. ctl_conf_ptr->schedrootfltr = (uint16_t) NO_VAL;
  1956. xfree( ctl_conf_ptr->select_type );
  1957. ctl_conf_ptr->select_type_param = (uint16_t) NO_VAL;
  1958. ctl_conf_ptr->slurm_user_id = (uint16_t) NO_VAL;
  1959. xfree (ctl_conf_ptr->slurm_user_name);
  1960. ctl_conf_ptr->slurmd_user_id = (uint16_t) NO_VAL;
  1961. xfree (ctl_conf_ptr->slurmd_user_name);
  1962. ctl_conf_ptr->slurmctld_debug = (uint16_t) NO_VAL;
  1963. xfree (ctl_conf_ptr->slurmctld_logfile);
  1964. xfree (ctl_conf_ptr->sched_logfile);
  1965. ctl_conf_ptr->sched_log_level = (uint16_t) NO_VAL;
  1966. xfree (ctl_conf_ptr->slurmctld_pidfile);
  1967. ctl_conf_ptr->slurmctld_port = (uint32_t) NO_VAL;
  1968. ctl_conf_ptr->slurmctld_port_count = 1;
  1969. ctl_conf_ptr->slurmctld_timeout = (uint16_t) NO_VAL;
  1970. ctl_conf_ptr->slurmd_debug = (uint16_t) NO_VAL;
  1971. xfree (ctl_conf_ptr->slurmd_logfile);
  1972. xfree (ctl_conf_ptr->slurmd_pidfile);
  1973. ctl_conf_ptr->slurmd_port = (uint32_t) NO_VAL;
  1974. xfree (ctl_conf_ptr->slurmd_spooldir);
  1975. ctl_conf_ptr->slurmd_timeout = (uint16_t) NO_VAL;
  1976. xfree (ctl_conf_ptr->srun_prolog);
  1977. xfree (ctl_conf_ptr->srun_epilog);
  1978. xfree (ctl_conf_ptr->state_save_location);
  1979. xfree (ctl_conf_ptr->suspend_exc_nodes);
  1980. xfree (ctl_conf_ptr->suspend_exc_parts);
  1981. xfree (ctl_conf_ptr->suspend_program);
  1982. ctl_conf_ptr->suspend_rate = (uint16_t) NO_VAL;
  1983. ctl_conf_ptr->suspend_time = (uint16_t) NO_VAL;
  1984. ctl_conf_ptr->suspend_timeout = 0;
  1985. xfree (ctl_conf_ptr->switch_type);
  1986. xfree (ctl_conf_ptr->task_epilog);
  1987. xfree (ctl_conf_ptr->task_plugin);
  1988. ctl_conf_ptr->task_plugin_param = 0;
  1989. xfree (ctl_conf_ptr->task_prolog);
  1990. xfree (ctl_conf_ptr->tmp_fs);
  1991. xfree (ctl_conf_ptr->topology_plugin);
  1992. ctl_conf_ptr->tree_width = (uint16_t) NO_VAL;
  1993. xfree (ctl_conf_ptr->unkillable_program);
  1994. ctl_conf_ptr->unkillable_timeout = (uint16_t) NO_VAL;
  1995. ctl_conf_ptr->use_pam = 0;
  1996. ctl_conf_ptr->vsize_factor = 0;
  1997. ctl_conf_ptr->wait_time = (uint16_t) NO_VAL;
  1998. ctl_conf_ptr->kill_on_bad_exit = 0;
  1999. _free_name_hashtbl();
  2000. _init_name_hashtbl();
  2001. return;
  2002. }
  2003. /* handle config name in form (example) slurmdbd:cluster0:10.0.0.254:6819
  2004. *
  2005. * NOTE: Changes are required in the accounting_storage/slurmdbd plugin in
  2006. * order for this to work as desired. Andriy Grytsenko (Massive Solutions
  2007. * Limited) has a private accounting_storage plugin with this functionality */
  2008. static int _config_is_storage(s_p_hashtbl_t *hashtbl, char *name)
  2009. {
  2010. char *cluster, *host, *port;
  2011. void *db_conn;
  2012. config_key_pair_t *pair;
  2013. List config;
  2014. ListIterator iter;
  2015. int rc = -1;
  2016. cluster = strchr(name, ':');
  2017. if (cluster == NULL)
  2018. return (-1);
  2019. host = strchr(&cluster[1], ':');
  2020. if (host == NULL)
  2021. return (-1);
  2022. port = strrchr(&host[1], ':');
  2023. if (port == NULL)
  2024. return (-1);
  2025. conf_ptr->accounting_storage_type = xstrdup_printf("accounting_storage/%.*s",
  2026. (int)(cluster - name), name);
  2027. cluster++;
  2028. cluster = xstrndup(cluster, host - cluster);
  2029. host++;
  2030. conf_ptr->accounting_storage_host = xstrndup(host, port - host);
  2031. port++;
  2032. debug3("trying retrieve config via %s from host %s on port %s",
  2033. conf_ptr->accounting_storage_type,
  2034. conf_ptr->accounting_storage_host, port);
  2035. conf_ptr->accounting_storage_port = atoi(port);
  2036. conf_ptr->plugindir = xstrdup(default_plugin_path);
  2037. /* unlock conf_lock and set as initialized before accessing it */
  2038. conf_initialized = true;
  2039. pthread_mutex_unlock(&conf_lock);
  2040. db_conn = acct_storage_g_get_connection(NULL, 0, false, NULL);
  2041. if (db_conn == NULL)
  2042. goto end; /* plugin will out error itself */
  2043. config = acct_storage_g_get_config(db_conn, "slurm.conf");
  2044. acct_storage_g_close_connection(&db_conn); /* ignore error code */
  2045. if (config == NULL) {
  2046. error("cannot retrieve config from storage");
  2047. goto end;
  2048. }
  2049. iter = list_iterator_create(config);
  2050. while ((pair = list_next(iter)) != NULL)
  2051. s_p_parse_pair(hashtbl, pair->name, pair->value);
  2052. list_iterator_destroy(iter);
  2053. list_destroy(config);
  2054. rc = 0; /* done */
  2055. end:
  2056. /* restore status quo now */
  2057. pthread_mutex_lock(&conf_lock);
  2058. conf_initialized = false;
  2059. xfree(cluster);
  2060. xfree(conf_ptr->accounting_storage_type);
  2061. xfree(conf_ptr->accounting_storage_host);
  2062. xfree(conf_ptr->plugindir);
  2063. conf_ptr->accounting_storage_type = NULL;
  2064. conf_ptr->accounting_storage_host = NULL;
  2065. conf_ptr->plugindir = NULL;
  2066. return (rc);
  2067. }
  2068. /* caller must lock conf_lock */
  2069. static void _init_slurm_conf(const char *file_name)
  2070. {
  2071. char *name = (char *)file_name;
  2072. /* conf_ptr = (slurm_ctl_conf_t *)xmalloc(sizeof(slurm_ctl_conf_t)); */
  2073. if (name == NULL) {
  2074. name = getenv("SLURM_CONF");
  2075. if (name == NULL)
  2076. name = default_slurm_config_file;
  2077. }
  2078. if (conf_initialized)
  2079. error("the conf_hashtbl is already inited");
  2080. debug("Reading slurm.conf file: %s", name);
  2081. conf_hashtbl = s_p_hashtbl_create(slurm_conf_options);
  2082. conf_ptr->last_update = time(NULL);
  2083. /* init hash to 0 */
  2084. conf_ptr->hash_val = 0;
  2085. if ((_config_is_storage(conf_hashtbl, name) < 0) &&
  2086. (s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name, false)
  2087. == SLURM_ERROR)) {
  2088. fatal("something wrong with opening/reading conf file");
  2089. }
  2090. /* s_p_dump_values(conf_hashtbl, slurm_conf_options); */
  2091. _validate_and_set_defaults(conf_ptr, conf_hashtbl);
  2092. conf_ptr->slurm_conf = xstrdup(name);
  2093. }
  2094. /* caller must lock conf_lock */
  2095. static void
  2096. _destroy_slurm_conf(void)
  2097. {
  2098. s_p_hashtbl_destroy(conf_hashtbl);
  2099. if (default_frontend_tbl != NULL) {
  2100. s_p_hashtbl_destroy(default_frontend_tbl);
  2101. default_frontend_tbl = NULL;
  2102. }
  2103. if (default_nodename_tbl != NULL) {
  2104. s_p_hashtbl_destroy(default_nodename_tbl);
  2105. default_nodename_tbl = NULL;
  2106. }
  2107. if (default_partition_tbl != NULL) {
  2108. s_p_hashtbl_destroy(default_partition_tbl);
  2109. default_partition_tbl = NULL;
  2110. }
  2111. free_slurm_conf(conf_ptr, true);
  2112. conf_initialized = false;
  2113. /* xfree(conf_ptr); */
  2114. }
  2115. /*
  2116. * slurm_conf_init - load the slurm configuration from the a file.
  2117. * IN file_name - name of the slurm configuration file to be read
  2118. * If file_name is NULL, then this routine tries to use
  2119. * the value in the SLURM_CONF env variable. Failing that,
  2120. * it uses the compiled-in default file name.
  2121. * If the conf structures have already been initialized by a call to
  2122. * slurm_conf_init, any subsequent calls will do nothing until
  2123. * slurm_conf_destroy is called.
  2124. * RET SLURM_SUCCESS if conf file is initialized. If the slurm conf
  2125. * was already initialied, return SLURM_ERROR.
  2126. */
  2127. extern int
  2128. slurm_conf_init(const char *file_name)
  2129. {
  2130. pthread_mutex_lock(&conf_lock);
  2131. if (conf_initialized) {
  2132. pthread_mutex_unlock(&conf_lock);
  2133. return SLURM_ERROR;
  2134. }
  2135. init_slurm_conf(conf_ptr);
  2136. _init_slurm_conf(file_name);
  2137. conf_initialized = true;
  2138. pthread_mutex_unlock(&conf_lock);
  2139. return SLURM_SUCCESS;
  2140. }
  2141. static int _internal_reinit(const char *file_name)
  2142. {
  2143. char *name = (char *)file_name;
  2144. if (name == NULL) {
  2145. name = getenv("SLURM_CONF");
  2146. if (name == NULL)
  2147. name = default_slurm_config_file;
  2148. }
  2149. if (conf_initialized) {
  2150. /* could check modified time on slurm.conf here */
  2151. _destroy_slurm_conf();
  2152. }
  2153. _init_slurm_conf(name);
  2154. conf_initialized = true;
  2155. return SLURM_SUCCESS;
  2156. }
  2157. /*
  2158. * slurm_conf_reinit - reload the slurm configuration from a file.
  2159. * IN file_name - name of the slurm configuration file to be read
  2160. * If file_name is NULL, then this routine tries to use
  2161. * the value in the SLURM_CONF env variable. Failing that,
  2162. * it uses the compiled-in default file name.
  2163. * Unlike slurm_conf_init, slurm_conf_reinit will always reread the
  2164. * file and reinitialize the configuration structures.
  2165. * RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR.
  2166. */
  2167. extern int
  2168. slurm_conf_reinit(const char *file_name)
  2169. {
  2170. int rc;
  2171. pthread_mutex_lock(&conf_lock);
  2172. rc = _internal_reinit(file_name);
  2173. pthread_mutex_unlock(&conf_lock);
  2174. return rc;
  2175. }
  2176. extern void
  2177. slurm_conf_mutex_init(void)
  2178. {
  2179. pthread_mutex_init(&conf_lock, NULL);
  2180. }
  2181. extern void
  2182. slurm_conf_install_fork_handlers(void)
  2183. {
  2184. int err;
  2185. if ((err = pthread_atfork(NULL, NULL, &slurm_conf_mutex_init)))
  2186. fatal("can't install slurm_conf atfork handler");
  2187. return;
  2188. }
  2189. extern int
  2190. slurm_conf_destroy(void)
  2191. {
  2192. pthread_mutex_lock(&conf_lock);
  2193. if (!conf_initialized) {
  2194. pthread_mutex_unlock(&conf_lock);
  2195. return SLURM_SUCCESS;
  2196. }
  2197. _destroy_slurm_conf();
  2198. pthread_mutex_unlock(&conf_lock);
  2199. return SLURM_SUCCESS;
  2200. }
  2201. extern slurm_ctl_conf_t *
  2202. slurm_conf_lock(void)
  2203. {
  2204. pthread_mutex_lock(&conf_lock);
  2205. if (!conf_initialized) {
  2206. _init_slurm_conf(NULL);
  2207. conf_initialized = true;
  2208. }
  2209. return conf_ptr;
  2210. }
  2211. extern void
  2212. slurm_conf_unlock(void)
  2213. {
  2214. pthread_mutex_unlock(&conf_lock);
  2215. }
  2216. /* Normalize supplied debug level to be in range per log.h definitions */
  2217. static void _normalize_debug_level(uint16_t *level)
  2218. {
  2219. if (*level > LOG_LEVEL_END) {
  2220. error("Normalizing debug level from %u to %d",
  2221. *level, (LOG_LEVEL_END - 1));
  2222. *level = (LOG_LEVEL_END - 1);
  2223. }
  2224. /* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */
  2225. }
  2226. /*
  2227. *
  2228. * IN/OUT ctl_conf_ptr - a configuration as loaded by read_slurm_conf_ctl
  2229. *
  2230. * NOTE: a backup_controller or control_machine of "localhost" are over-written
  2231. * with this machine's name.
  2232. * NOTE: if backup_addr is NULL, it is over-written by backup_controller
  2233. * NOTE: if control_addr is NULL, it is over-written by control_machine
  2234. */
  2235. static void
  2236. _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
  2237. {
  2238. char *temp_str = NULL;
  2239. long long_suspend_time;
  2240. bool truth;
  2241. char *default_storage_type = NULL, *default_storage_host = NULL;
  2242. char *default_storage_user = NULL, *default_storage_pass = NULL;
  2243. char *default_storage_loc = NULL;
  2244. uint32_t default_storage_port = 0;
  2245. uint16_t uint16_tmp;
  2246. if (s_p_get_string(&conf->backup_controller, "BackupController",
  2247. hashtbl)
  2248. && strcasecmp("localhost", conf->backup_controller) == 0) {
  2249. xfree(conf->backup_controller);
  2250. conf->backup_controller = xmalloc (MAX_SLURM_NAME);
  2251. if (gethostname_short(conf->backup_controller, MAX_SLURM_NAME))
  2252. fatal("getnodename: %m");
  2253. }
  2254. if (s_p_get_string(&conf->backup_addr, "BackupAddr", hashtbl)) {
  2255. if (conf->backup_controller == NULL) {
  2256. error("BackupAddr specified without BackupController");
  2257. xfree(conf->backup_addr);
  2258. }
  2259. } else {
  2260. if (conf->backup_controller != NULL)
  2261. conf->backup_addr = xstrdup(conf->backup_controller);
  2262. }
  2263. if (!s_p_get_uint16(&conf->batch_start_timeout, "BatchStartTimeout",
  2264. hashtbl))
  2265. conf->batch_start_timeout = DEFAULT_BATCH_START_TIMEOUT;
  2266. s_p_get_string(&conf->cluster_name, "ClusterName", hashtbl);
  2267. /* Some databases are case sensitive so we have to make sure
  2268. * the cluster name is lower case since sacctmgr makes sure
  2269. * this is the case as well.
  2270. */
  2271. if (conf->cluster_name) {
  2272. int i;
  2273. for (i = 0; conf->cluster_name[i] != '\0'; i++)
  2274. conf->cluster_name[i] =
  2275. (char)tolower((int)conf->cluster_name[i]);
  2276. }
  2277. if (!s_p_get_uint16(&conf->complete_wait, "CompleteWait", hashtbl))
  2278. conf->complete_wait = DEFAULT_COMPLETE_WAIT;
  2279. if (!s_p_get_string(&conf->control_machine, "ControlMachine", hashtbl))
  2280. fatal ("ControlMachine not specified.");
  2281. else if (strcasecmp("localhost", conf->control_machine) == 0) {
  2282. xfree (conf->control_machine);
  2283. conf->control_machine = xmalloc(MAX_SLURM_NAME);
  2284. if (gethostname_short(conf->control_machine, MAX_SLURM_NAME))
  2285. fatal("getnodename: %m");
  2286. }
  2287. if (!s_p_get_string(&conf->control_addr, "ControlAddr", hashtbl) &&
  2288. (conf->control_machine != NULL)) {
  2289. if (strchr(conf->control_machine, ',')) {
  2290. fatal("ControlMachine has multiple host names so "
  2291. "ControlAddr must be specified");
  2292. }
  2293. conf->control_addr = xstrdup (conf->control_machine);
  2294. }
  2295. if ((conf->backup_controller != NULL) &&
  2296. (strcmp(conf->backup_controller, conf->control_machine) == 0)) {
  2297. error("ControlMachine and BackupController identical");
  2298. xfree(conf->backup_addr);
  2299. xfree(conf->backup_controller);
  2300. }
  2301. if (!s_p_get_string(&conf->acct_gather_energy_type,
  2302. "AcctGatherEnergyType", hashtbl))
  2303. conf->acct_gather_energy_type =
  2304. xstrdup(DEFAULT_ACCT_GATHER_ENERGY_TYPE);
  2305. if (!s_p_get_uint16(&conf->acct_gather_node_freq,
  2306. "AcctGatherNodeFreq", hashtbl))
  2307. conf->acct_gather_node_freq = 0;
  2308. s_p_get_string(&default_storage_type, "DefaultStorageType", hashtbl);
  2309. s_p_get_string(&default_storage_host, "DefaultStorageHost", hashtbl);
  2310. s_p_get_string(&default_storage_user, "DefaultStorageUser", hashtbl);
  2311. s_p_get_string(&default_storage_pass, "DefaultStoragePass", hashtbl);
  2312. s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl);
  2313. s_p_get_uint32(&default_storage_port, "DefaultStoragePort", hashtbl);
  2314. s_p_get_string(&conf->job_credential_private_key,
  2315. "JobCredentialPrivateKey", hashtbl);
  2316. s_p_get_string(&conf->job_credential_public_certificate,
  2317. "JobCredentialPublicCertificate", hashtbl);
  2318. if (s_p_get_uint32(&conf->max_job_cnt, "MaxJobCount", hashtbl) &&
  2319. (conf->max_job_cnt < 1))
  2320. fatal("MaxJobCount=%u, No jobs permitted", conf->max_job_cnt);
  2321. if (s_p_get_uint32(&conf->max_step_cnt, "MaxStepCount", hashtbl) &&
  2322. (conf->max_step_cnt < 1)) {
  2323. fatal("MaxStepCount=%u, No steps permitted",
  2324. conf->max_step_cnt);
  2325. }
  2326. if (!s_p_get_string(&conf->authtype, "AuthType", hashtbl))
  2327. conf->authtype = xstrdup(DEFAULT_AUTH_TYPE);
  2328. if (s_p_get_uint16(&uint16_tmp, "GroupUpdateTime", hashtbl)) {
  2329. if (uint16_tmp > GROUP_TIME_MASK) {
  2330. fatal("GroupUpdateTime exceeds limit of %u",
  2331. GROUP_TIME_MASK);
  2332. }
  2333. conf->group_info = uint16_tmp;
  2334. } else
  2335. conf->group_info = DEFAULT_GROUP_INFO;
  2336. if (s_p_get_uint16(&uint16_tmp, "CacheGroups", hashtbl) && uint16_tmp)
  2337. conf->group_info |= GROUP_CACHE;
  2338. if (s_p_get_uint16(&uint16_tmp, "GroupUpdateForce", hashtbl) &&
  2339. uint16_tmp)
  2340. conf->group_info |= GROUP_FORCE;
  2341. if (!s_p_get_string(&conf->checkpoint_type, "CheckpointType", hashtbl))
  2342. conf->checkpoint_type = xstrdup(DEFAULT_CHECKPOINT_TYPE);
  2343. if (!s_p_get_string(&conf->crypto_type, "CryptoType", hashtbl))
  2344. conf->crypto_type = xstrdup(DEFAULT_CRYPTO_TYPE);
  2345. if ((strcmp(conf->crypto_type, "crypto/openssl") == 0) &&
  2346. ((conf->job_credential_private_key == NULL) ||
  2347. (conf->job_credential_public_certificate == NULL))) {
  2348. fatal("CryptoType=crypto/openssl requires that both "
  2349. "JobCredentialPrivateKey and "
  2350. "JobCredentialPublicCertificate be set");
  2351. }
  2352. if (s_p_get_uint32(&conf->def_mem_per_cpu, "DefMemPerCPU", hashtbl))
  2353. conf->def_mem_per_cpu |= MEM_PER_CPU;
  2354. else if (!s_p_get_uint32(&conf->def_mem_per_cpu, "DefMemPerNode",
  2355. hashtbl))
  2356. conf->def_mem_per_cpu = DEFAULT_MEM_PER_CPU;
  2357. if (s_p_get_string(&temp_str, "DebugFlags", hashtbl)) {
  2358. conf->debug_flags = debug_str2flags(temp_str);
  2359. if (conf->debug_flags == NO_VAL)
  2360. fatal("DebugFlags invalid: %s", temp_str);
  2361. xfree(temp_str);
  2362. } else /* Default: no DebugFlags */
  2363. conf->debug_flags = 0;
  2364. if (!s_p_get_boolean((bool *) &conf->disable_root_jobs,
  2365. "DisableRootJobs", hashtbl))
  2366. conf->disable_root_jobs = DEFAULT_DISABLE_ROOT_JOBS;
  2367. if (!s_p_get_boolean((bool *) &conf->enforce_part_limits,
  2368. "EnforcePartLimits", hashtbl))
  2369. conf->enforce_part_limits = DEFAULT_ENFORCE_PART_LIMITS;
  2370. s_p_get_string(&conf->epilog, "Epilog", hashtbl);
  2371. if (!s_p_get_uint32(&conf->epilog_msg_time, "EpilogMsgTime", hashtbl))
  2372. conf->epilog_msg_time = DEFAULT_EPILOG_MSG_TIME;
  2373. s_p_get_string(&conf->epilog_slurmctld, "EpilogSlurmctld", hashtbl);
  2374. if (!s_p_get_uint16(&conf->fast_schedule, "FastSchedule", hashtbl))
  2375. conf->fast_schedule = DEFAULT_FAST_SCHEDULE;
  2376. if (!s_p_get_uint32(&conf->first_job_id, "FirstJobId", hashtbl))
  2377. conf->first_job_id = DEFAULT_FIRST_JOB_ID;
  2378. if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl))
  2379. conf->max_job_id = DEFAULT_MAX_JOB_ID;
  2380. s_p_get_string(&conf->gres_plugins, "GresTypes", hashtbl);
  2381. if (s_p_get_uint16(&conf->inactive_limit, "InactiveLimit", hashtbl)) {
  2382. #ifdef HAVE_BG_L_P
  2383. /* Inactive limit must be zero on BlueGene L/P */
  2384. if (conf->inactive_limit) {
  2385. error("InactiveLimit=%d is invalid on BlueGene L/P",
  2386. conf->inactive_limit);
  2387. }
  2388. conf->inactive_limit = 0;
  2389. #endif
  2390. } else {
  2391. #ifdef HAVE_BG_L_P
  2392. conf->inactive_limit = 0;
  2393. #endif
  2394. conf->inactive_limit = DEFAULT_INACTIVE_LIMIT;
  2395. }
  2396. if (!s_p_get_uint16(&conf->job_acct_gather_freq,
  2397. "JobAcctGatherFrequency", hashtbl))
  2398. conf->job_acct_gather_freq = DEFAULT_JOB_ACCT_GATHER_FREQ;
  2399. if(!s_p_get_string(&conf->job_acct_gather_type,
  2400. "JobAcctGatherType", hashtbl))
  2401. conf->job_acct_gather_type =
  2402. xstrdup(DEFAULT_JOB_ACCT_GATHER_TYPE);
  2403. if (!s_p_get_string(&conf->job_ckpt_dir, "JobCheckpointDir", hashtbl))
  2404. conf->job_ckpt_dir = xstrdup(DEFAULT_JOB_CKPT_DIR);
  2405. if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl)) {
  2406. if(default_storage_type) {
  2407. if(!strcasecmp("slurmdbd", default_storage_type)) {
  2408. error("Can not use the default storage type "
  2409. "specified for jobcomp since there is "
  2410. "not slurmdbd type. We are using %s "
  2411. "as the type. To disable this message "
  2412. "set JobCompType in your slurm.conf",
  2413. DEFAULT_JOB_COMP_TYPE);
  2414. conf->job_comp_type =
  2415. xstrdup(DEFAULT_JOB_COMP_TYPE);
  2416. } else
  2417. conf->job_comp_type =
  2418. xstrdup_printf("jobcomp/%s",
  2419. default_storage_type);
  2420. } else
  2421. conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE);
  2422. }
  2423. if (!s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl)) {
  2424. if(default_storage_loc)
  2425. conf->job_comp_loc = xstrdup(default_storage_loc);
  2426. else if(!strcmp(conf->job_comp_type, "job_comp/mysql")
  2427. || !strcmp(conf->job_comp_type, "job_comp/pgsql"))
  2428. conf->job_comp_loc = xstrdup(DEFAULT_JOB_COMP_DB);
  2429. else
  2430. conf->job_comp_loc = xstrdup(DEFAULT_JOB_COMP_LOC);
  2431. }
  2432. if (!s_p_get_string(&conf->job_comp_host, "JobCompHost",
  2433. hashtbl)) {
  2434. if(default_storage_host)
  2435. conf->job_comp_host = xstrdup(default_storage_host);
  2436. else
  2437. conf->job_comp_host = xstrdup(DEFAULT_STORAGE_HOST);
  2438. }
  2439. if (!s_p_get_string(&conf->job_comp_user, "JobCompUser",
  2440. hashtbl)) {
  2441. if(default_storage_user)
  2442. conf->job_comp_user = xstrdup(default_storage_user);
  2443. else
  2444. conf->job_comp_user = xstrdup(DEFAULT_STORAGE_USER);
  2445. }
  2446. if (!s_p_get_string(&conf->job_comp_pass, "JobCompPass",
  2447. hashtbl)) {
  2448. if(default_storage_pass)
  2449. conf->job_comp_pass = xstrdup(default_storage_pass);
  2450. }
  2451. if (!s_p_get_uint32(&conf->job_comp_port, "JobCompPort",
  2452. hashtbl)) {
  2453. if(default_storage_port)
  2454. conf->job_comp_port = default_storage_port;
  2455. else if(!strcmp(conf->job_comp_type, "job_comp/mysql"))
  2456. conf->job_comp_port = DEFAULT_MYSQL_PORT;
  2457. else if(!strcmp(conf->job_comp_type, "job_comp/pgsql"))
  2458. conf->job_comp_port = DEFAULT_PGSQL_PORT;
  2459. else
  2460. conf->job_comp_port = DEFAULT_STORAGE_PORT;
  2461. }
  2462. if (!s_p_get_uint16(&conf->job_file_append, "JobFileAppend", hashtbl))
  2463. conf->job_file_append = 0;
  2464. if (!s_p_get_uint16(&conf->job_requeue, "JobRequeue", hashtbl))
  2465. conf->job_requeue = 1;
  2466. else if (conf->job_requeue > 1)
  2467. conf->job_requeue = 1;
  2468. s_p_get_string(&conf->job_submit_plugins, "JobSubmitPlugins",
  2469. hashtbl);
  2470. if (!s_p_get_uint16(&conf->get_env_timeout, "GetEnvTimeout", hashtbl))
  2471. conf->get_env_timeout = DEFAULT_GET_ENV_TIMEOUT;
  2472. s_p_get_uint16(&conf->health_check_interval, "HealthCheckInterval",
  2473. hashtbl);
  2474. s_p_get_string(&conf->health_check_program, "HealthCheckProgram",
  2475. hashtbl);
  2476. if (!s_p_get_uint16(&conf->kill_on_bad_exit, "KillOnBadExit", hashtbl))
  2477. conf->kill_on_bad_exit = DEFAULT_KILL_ON_BAD_EXIT;
  2478. if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl))
  2479. conf->kill_wait = DEFAULT_KILL_WAIT;
  2480. if (!s_p_get_string(&conf->launch_type, "LaunchType", hashtbl))
  2481. conf->launch_type = xstrdup(DEFAULT_LAUNCH_TYPE);
  2482. s_p_get_string(&conf->licenses, "Licenses", hashtbl);
  2483. if (!s_p_get_string(&conf->mail_prog, "MailProg", hashtbl))
  2484. conf->mail_prog = xstrdup(DEFAULT_MAIL_PROG);
  2485. if (!s_p_get_uint32(&conf->max_job_cnt, "MaxJobCount", hashtbl))
  2486. conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT;
  2487. if (!s_p_get_uint32(&conf->max_job_id, "MaxJobId", hashtbl))
  2488. conf->max_job_id = DEFAULT_MAX_JOB_ID;
  2489. if (s_p_get_uint32(&conf->max_mem_per_cpu,
  2490. "MaxMemPerCPU", hashtbl)) {
  2491. conf->max_mem_per_cpu |= MEM_PER_CPU;
  2492. } else if (!s_p_get_uint32(&conf->max_mem_per_cpu,
  2493. "MaxMemPerNode", hashtbl)) {
  2494. conf->max_mem_per_cpu = DEFAULT_MAX_MEM_PER_CPU;
  2495. }
  2496. if (!s_p_get_uint32(&conf->max_step_cnt, "MaxStepCount", hashtbl))
  2497. conf->max_step_cnt = DEFAULT_MAX_STEP_COUNT;
  2498. if (!s_p_get_uint16(&conf->max_tasks_per_node, "MaxTasksPerNode",
  2499. hashtbl)) {
  2500. conf->max_tasks_per_node = DEFAULT_MAX_TASKS_PER_NODE;
  2501. }
  2502. if (!s_p_get_uint16(&conf->msg_timeout, "MessageTimeout", hashtbl))
  2503. conf->msg_timeout = DEFAULT_MSG_TIMEOUT;
  2504. else if (conf->msg_timeout > 100) {
  2505. info("WARNING: MessageTimeout is too high for effective "
  2506. "fault-tolerance");
  2507. }
  2508. if (!s_p_get_uint16(&conf->min_job_age, "MinJobAge", hashtbl))
  2509. conf->min_job_age = DEFAULT_MIN_JOB_AGE;
  2510. else if (conf->min_job_age < 2) {
  2511. info("WARNING: MinJobAge must be at least 2");
  2512. conf->min_job_age = 2;
  2513. }
  2514. if (!s_p_get_string(&conf->mpi_default, "MpiDefault", hashtbl))
  2515. conf->mpi_default = xstrdup(DEFAULT_MPI_DEFAULT);
  2516. s_p_get_string(&conf->mpi_params, "MpiParams", hashtbl);
  2517. if(!s_p_get_boolean((bool *)&conf->track_wckey,
  2518. "TrackWCKey", hashtbl))
  2519. conf->track_wckey = false;
  2520. if (!s_p_get_string(&conf->accounting_storage_type,
  2521. "AccountingStorageType", hashtbl)) {
  2522. if(default_storage_type)
  2523. conf->accounting_storage_type =
  2524. xstrdup_printf("accounting_storage/%s",
  2525. default_storage_type);
  2526. else
  2527. conf->accounting_storage_type =
  2528. xstrdup(DEFAULT_ACCOUNTING_STORAGE_TYPE);
  2529. }
  2530. if (s_p_get_string(&temp_str, "AccountingStorageEnforce", hashtbl)) {
  2531. if (strstr(temp_str, "1") || strstr(temp_str, "associations"))
  2532. conf->accounting_storage_enforce
  2533. |= ACCOUNTING_ENFORCE_ASSOCS;
  2534. if (strstr(temp_str, "2") || strstr(temp_str, "limits")) {
  2535. conf->accounting_storage_enforce
  2536. |= ACCOUNTING_ENFORCE_ASSOCS;
  2537. conf->accounting_storage_enforce
  2538. |= ACCOUNTING_ENFORCE_LIMITS;
  2539. }
  2540. if (strstr(temp_str, "safe")) {
  2541. conf->accounting_storage_enforce
  2542. |= ACCOUNTING_ENFORCE_ASSOCS;
  2543. conf->accounting_storage_enforce
  2544. |= ACCOUNTING_ENFORCE_LIMITS;
  2545. conf->accounting_storage_enforce
  2546. |= ACCOUNTING_ENFORCE_SAFE;
  2547. }
  2548. if (strstr(temp_str, "wckeys")) {
  2549. conf->accounting_storage_enforce
  2550. |= ACCOUNTING_ENFORCE_ASSOCS;
  2551. conf->accounting_storage_enforce
  2552. |= ACCOUNTING_ENFORCE_WCKEYS;
  2553. conf->track_wckey = true;
  2554. }
  2555. if (strstr(temp_str, "qos")) {
  2556. conf->accounting_storage_enforce
  2557. |= ACCOUNTING_ENFORCE_ASSOCS;
  2558. conf->accounting_storage_enforce
  2559. |= ACCOUNTING_ENFORCE_QOS;
  2560. }
  2561. if (strstr(temp_str, "all")) {
  2562. conf->accounting_storage_enforce = 0xffff;
  2563. conf->track_wckey = true;
  2564. }
  2565. xfree(temp_str);
  2566. } else
  2567. conf->accounting_storage_enforce = 0;
  2568. /* if no backup we don't care */
  2569. s_p_get_string(&conf->accounting_storage_backup_host,
  2570. "AccountingStorageBackupHost", hashtbl);
  2571. if (!s_p_get_string(&conf->accounting_storage_host,
  2572. "AccountingStorageHost", hashtbl)) {
  2573. if(default_storage_host)
  2574. conf->accounting_storage_host =
  2575. xstrdup(default_storage_host);
  2576. else
  2577. conf->accounting_storage_host =
  2578. xstrdup(DEFAULT_STORAGE_HOST);
  2579. }
  2580. if (!s_p_get_string(&conf->accounting_storage_loc,
  2581. "AccountingStorageLoc", hashtbl)) {
  2582. if (default_storage_loc)
  2583. conf->accounting_storage_loc =
  2584. xstrdup(default_storage_loc);
  2585. else if (!strcmp(conf->accounting_storage_type,
  2586. "accounting_storage/mysql") ||
  2587. !strcmp(conf->accounting_storage_type,
  2588. "accounting_storage/pgsql"))
  2589. conf->accounting_storage_loc =
  2590. xstrdup(DEFAULT_ACCOUNTING_DB);
  2591. else
  2592. conf->accounting_storage_loc =
  2593. xstrdup(DEFAULT_STORAGE_LOC);
  2594. }
  2595. if (!s_p_get_string(&conf->accounting_storage_user,
  2596. "AccountingStorageUser", hashtbl)) {
  2597. if(default_storage_user)
  2598. conf->accounting_storage_user =
  2599. xstrdup(default_storage_user);
  2600. else
  2601. conf->accounting_storage_user =
  2602. xstrdup(DEFAULT_STORAGE_USER);
  2603. }
  2604. if (!s_p_get_string(&conf->accounting_storage_pass,
  2605. "AccountingStoragePass", hashtbl)) {
  2606. if(default_storage_pass)
  2607. conf->accounting_storage_pass =
  2608. xstrdup(default_storage_pass);
  2609. }
  2610. if (s_p_get_boolean(&truth, "AccountingStoreJobComment", hashtbl)
  2611. && !truth)
  2612. conf->acctng_store_job_comment = 0;
  2613. else
  2614. conf->acctng_store_job_comment = 1;
  2615. if (!s_p_get_uint32(&conf->accounting_storage_port,
  2616. "AccountingStoragePort", hashtbl)) {
  2617. if(default_storage_port)
  2618. conf->accounting_storage_port = default_storage_port;
  2619. else if(!strcmp(conf->accounting_storage_type,
  2620. "accounting_storage/slurmdbd"))
  2621. conf->accounting_storage_port = SLURMDBD_PORT;
  2622. else if(!strcmp(conf->accounting_storage_type,
  2623. "accounting_storage/mysql"))
  2624. conf->accounting_storage_port = DEFAULT_MYSQL_PORT;
  2625. else if(!strcmp(conf->accounting_storage_type,
  2626. "accounting_storage/pgsql"))
  2627. conf->accounting_storage_port = DEFAULT_PGSQL_PORT;
  2628. else
  2629. conf->accounting_storage_port = DEFAULT_STORAGE_PORT;
  2630. }
  2631. /* remove the user and loc if using slurmdbd */
  2632. if(!strcmp(conf->accounting_storage_type,
  2633. "accounting_storage/slurmdbd")) {
  2634. xfree(conf->accounting_storage_loc);
  2635. conf->accounting_storage_loc = xstrdup("N/A");
  2636. xfree(conf->accounting_storage_user);
  2637. conf->accounting_storage_user = xstrdup("N/A");
  2638. }
  2639. s_p_get_uint16(&conf->over_time_limit, "OverTimeLimit", hashtbl);
  2640. if (!s_p_get_string(&conf->plugindir, "PluginDir", hashtbl))
  2641. conf->plugindir = xstrdup(default_plugin_path);
  2642. if (!_is_valid_path(conf->plugindir, "PluginDir"))
  2643. fatal("Bad value \"%s\" for PluginDir", conf->plugindir);
  2644. if (!s_p_get_string(&conf->plugstack, "PlugStackConfig", hashtbl))
  2645. conf->plugstack = xstrdup(default_plugstack);
  2646. if (s_p_get_string(&temp_str, "PreemptMode", hashtbl)) {
  2647. conf->preempt_mode = preempt_mode_num(temp_str);
  2648. if (conf->preempt_mode == (uint16_t) NO_VAL)
  2649. fatal("PreemptMode=%s invalid", temp_str);
  2650. if (conf->preempt_mode == PREEMPT_MODE_SUSPEND)
  2651. fatal("PreemptMode=SUSPEND requires GANG too");
  2652. xfree(temp_str);
  2653. } else {
  2654. conf->preempt_mode = PREEMPT_MODE_OFF;
  2655. }
  2656. if (!s_p_get_string(&conf->preempt_type, "PreemptType", hashtbl))
  2657. conf->preempt_type = xstrdup(DEFAULT_PREEMPT_TYPE);
  2658. if (strcmp(conf->preempt_type, "preempt/qos") == 0) {
  2659. int preempt_mode = conf->preempt_mode & (~PREEMPT_MODE_GANG);
  2660. if ((preempt_mode == PREEMPT_MODE_OFF) ||
  2661. (preempt_mode == PREEMPT_MODE_SUSPEND)) {
  2662. fatal("PreemptType and PreemptMode values "
  2663. "incompatible");
  2664. }
  2665. } else if (strcmp(conf->preempt_type, "preempt/partition_prio") == 0) {
  2666. int preempt_mode = conf->preempt_mode & (~PREEMPT_MODE_GANG);
  2667. if (preempt_mode == PREEMPT_MODE_OFF) {
  2668. fatal("PreemptType and PreemptMode values "
  2669. "incompatible");
  2670. }
  2671. } else if (strcmp(conf->preempt_type, "preempt/none") == 0) {
  2672. int preempt_mode = conf->preempt_mode & (~PREEMPT_MODE_GANG);
  2673. if (preempt_mode != PREEMPT_MODE_OFF) {
  2674. fatal("PreemptType and PreemptMode values "
  2675. "incompatible");
  2676. }
  2677. }
  2678. #ifdef HAVE_BG
  2679. if ((conf->preempt_mode & PREEMPT_MODE_GANG) ||
  2680. (conf->preempt_mode & PREEMPT_MODE_SUSPEND))
  2681. fatal("PreemptMode incompatible with BlueGene systems");
  2682. #endif
  2683. if (s_p_get_string(&temp_str, "PriorityDecayHalfLife", hashtbl)) {
  2684. int max_time = time_str2mins(temp_str);
  2685. if ((max_time < 0) && (max_time != INFINITE)) {
  2686. fatal("Bad value \"%s\" for PriorityDecayHalfLife",
  2687. temp_str);
  2688. }
  2689. conf->priority_decay_hl = max_time * 60;
  2690. xfree(temp_str);
  2691. } else
  2692. conf->priority_decay_hl = DEFAULT_PRIORITY_DECAY;
  2693. if (s_p_get_string(&temp_str, "PriorityCalcPeriod", hashtbl)) {
  2694. int calc_period = time_str2mins(temp_str);
  2695. if (calc_period < 1) {
  2696. fatal("Bad value \"%s\" for PriorityCalcPeriod",
  2697. temp_str);
  2698. }
  2699. conf->priority_calc_period = calc_period * 60;
  2700. xfree(temp_str);
  2701. } else
  2702. conf->priority_calc_period = DEFAULT_PRIORITY_CALC_PERIOD;
  2703. if (s_p_get_boolean(&truth, "PriorityFavorSmall", hashtbl) && truth)
  2704. conf->priority_favor_small = 1;
  2705. else
  2706. conf->priority_favor_small = 0;
  2707. conf->priority_flags = 0;
  2708. if (s_p_get_string(&temp_str, "PriorityFlags", hashtbl)) {
  2709. if (strstr(temp_str, "ACCRUE_ALWAYS"))
  2710. conf->priority_flags |= PRIORITY_FLAGS_ACCRUE_ALWAYS;
  2711. }
  2712. if (s_p_get_string(&temp_str, "PriorityMaxAge", hashtbl)) {
  2713. int max_time = time_str2mins(temp_str);
  2714. if ((max_time < 0) && (max_time != INFINITE)) {
  2715. fatal("Bad value \"%s\" for PriorityMaxAge",
  2716. temp_str);
  2717. }
  2718. conf->priority_max_age = max_time * 60;
  2719. xfree(temp_str);
  2720. } else
  2721. conf->priority_max_age = DEFAULT_PRIORITY_DECAY;
  2722. if (s_p_get_string(&temp_str, "PriorityUsageResetPeriod", hashtbl)) {
  2723. if (strcasecmp(temp_str, "none") == 0)
  2724. conf->priority_reset_period = PRIORITY_RESET_NONE;
  2725. else if (strcasecmp(temp_str, "now") == 0)
  2726. conf->priority_reset_period = PRIORITY_RESET_NOW;
  2727. else if (strcasecmp(temp_str, "daily") == 0)
  2728. conf->priority_reset_period = PRIORITY_RESET_DAILY;
  2729. else if (strcasecmp(temp_str, "weekly") == 0)
  2730. conf->priority_reset_period = PRIORITY_RESET_WEEKLY;
  2731. else if (strcasecmp(temp_str, "monthly") == 0)
  2732. conf->priority_reset_period = PRIORITY_RESET_MONTHLY;
  2733. else if (strcasecmp(temp_str, "quarterly") == 0)
  2734. conf->priority_reset_period = PRIORITY_RESET_QUARTERLY;
  2735. else if (strcasecmp(temp_str, "yearly") == 0)
  2736. conf->priority_reset_period = PRIORITY_RESET_YEARLY;
  2737. else {
  2738. fatal("Bad value \"%s\" for PriorityUsageResetPeriod",
  2739. temp_str);
  2740. }
  2741. xfree(temp_str);
  2742. } else {
  2743. conf->priority_reset_period = PRIORITY_RESET_NONE;
  2744. if(!conf->priority_decay_hl) {
  2745. fatal("You have to either have "
  2746. "PriorityDecayHalfLife != 0 or "
  2747. "PriorityUsageResetPeriod set to something "
  2748. "or the priority plugin will result in "
  2749. "rolling over.");
  2750. }
  2751. }
  2752. if (!s_p_get_string(&conf->priority_type, "PriorityType", hashtbl))
  2753. conf->priority_type = xstrdup(DEFAULT_PRIORITY_TYPE);
  2754. if (!s_p_get_uint32(&conf->priority_weight_age,
  2755. "PriorityWeightAge", hashtbl))
  2756. conf->priority_weight_age = 0;
  2757. if (!s_p_get_uint32(&conf->priority_weight_fs,
  2758. "PriorityWeightFairshare", hashtbl))
  2759. conf->priority_weight_fs = 0;
  2760. if (!s_p_get_uint32(&conf->priority_weight_js,
  2761. "PriorityWeightJobSize", hashtbl))
  2762. conf->priority_weight_js = 0;
  2763. if (!s_p_get_uint32(&conf->priority_weight_part,
  2764. "PriorityWeightPartition", hashtbl))
  2765. conf->priority_weight_part = 0;
  2766. if (!s_p_get_uint32(&conf->priority_weight_qos,
  2767. "PriorityWeightQOS", hashtbl))
  2768. conf->priority_weight_qos = 0;
  2769. /* Out of order due to use with ProctrackType */
  2770. if (!s_p_get_string(&conf->switch_type, "SwitchType", hashtbl))
  2771. conf->switch_type = xstrdup(DEFAULT_SWITCH_TYPE);
  2772. if (!s_p_get_string(&conf->proctrack_type, "ProctrackType", hashtbl)) {
  2773. if (!strcmp(conf->switch_type,"switch/elan"))
  2774. conf->proctrack_type = xstrdup("proctrack/rms");
  2775. else
  2776. conf->proctrack_type =
  2777. xstrdup(DEFAULT_PROCTRACK_TYPE);
  2778. }
  2779. #ifdef HAVE_REAL_CRAY
  2780. if (strcmp(conf->proctrack_type, "proctrack/sgi_job"))
  2781. fatal("On Cray ProctrackType=proctrack/sgi_job is required to "
  2782. "ensure collision-free tracking of ALPS reservations");
  2783. #endif
  2784. if ((!strcmp(conf->switch_type, "switch/elan"))
  2785. && (!strcmp(conf->proctrack_type,"proctrack/linuxproc")))
  2786. fatal("proctrack/linuxproc is incompatible with switch/elan");
  2787. conf->private_data = 0; /* Set to default before parsing PrivateData */
  2788. if (s_p_get_string(&temp_str, "PrivateData", hashtbl)) {
  2789. if (strstr(temp_str, "account"))
  2790. conf->private_data |= PRIVATE_DATA_ACCOUNTS;
  2791. if (strstr(temp_str, "job"))
  2792. conf->private_data |= PRIVATE_DATA_JOBS;
  2793. if (strstr(temp_str, "node"))
  2794. conf->private_data |= PRIVATE_DATA_NODES;
  2795. if (strstr(temp_str, "partition"))
  2796. conf->private_data |= PRIVATE_DATA_PARTITIONS;
  2797. if (strstr(temp_str, "reservation"))
  2798. conf->private_data |= PRIVATE_DATA_RESERVATIONS;
  2799. if (strstr(temp_str, "usage"))
  2800. conf->private_data |= PRIVATE_DATA_USAGE;
  2801. if (strstr(temp_str, "user"))
  2802. conf->private_data |= PRIVATE_DATA_USERS;
  2803. if (strstr(temp_str, "all"))
  2804. conf->private_data = 0xffff;
  2805. xfree(temp_str);
  2806. }
  2807. s_p_get_string(&conf->prolog, "Prolog", hashtbl);
  2808. s_p_get_string(&conf->prolog_slurmctld, "PrologSlurmctld", hashtbl);
  2809. if (!s_p_get_uint16(&conf->propagate_prio_process,
  2810. "PropagatePrioProcess", hashtbl)) {
  2811. conf->propagate_prio_process = PROP_PRIO_OFF;
  2812. } else if (conf->propagate_prio_process > PROP_PRIO_NICER) {
  2813. fatal("Bad PropagatePrioProcess: %u",
  2814. conf->propagate_prio_process);
  2815. }
  2816. if (s_p_get_string(&conf->propagate_rlimits_except,
  2817. "PropagateResourceLimitsExcept", hashtbl)) {
  2818. if ((parse_rlimits(conf->propagate_rlimits_except,
  2819. NO_PROPAGATE_RLIMITS)) < 0)
  2820. fatal("Bad PropagateResourceLimitsExcept: %s",
  2821. conf->propagate_rlimits_except);
  2822. } else {
  2823. if (!s_p_get_string(&conf->propagate_rlimits,
  2824. "PropagateResourceLimits", hashtbl))
  2825. conf->propagate_rlimits = xstrdup( "ALL" );
  2826. if ((parse_rlimits(conf->propagate_rlimits,
  2827. PROPAGATE_RLIMITS )) < 0)
  2828. fatal("Bad PropagateResourceLimits: %s",
  2829. conf->propagate_rlimits);
  2830. }
  2831. if (s_p_get_string(&temp_str, "ReconfigFlags", hashtbl)) {
  2832. conf->reconfig_flags = reconfig_str2flags(temp_str);
  2833. if (conf->reconfig_flags == 0xffff)
  2834. fatal("ReconfigFlags invalid: %s", temp_str);
  2835. xfree(temp_str);
  2836. } else /* Default: no ReconfigFlags */
  2837. conf->reconfig_flags = 0;
  2838. if (!s_p_get_uint16(&conf->ret2service, "ReturnToService", hashtbl))
  2839. conf->ret2service = DEFAULT_RETURN_TO_SERVICE;
  2840. #ifdef HAVE_CRAY
  2841. if (conf->ret2service > 1)
  2842. fatal("ReturnToService > 1 is not supported on Cray");
  2843. #endif
  2844. s_p_get_uint16(&conf->resv_over_run, "ResvOverRun", hashtbl);
  2845. s_p_get_string(&conf->resume_program, "ResumeProgram", hashtbl);
  2846. if (!s_p_get_uint16(&conf->resume_rate, "ResumeRate", hashtbl))
  2847. conf->resume_rate = DEFAULT_RESUME_RATE;
  2848. if (!s_p_get_uint16(&conf->resume_timeout, "ResumeTimeout", hashtbl))
  2849. conf->resume_timeout = DEFAULT_RESUME_TIMEOUT;
  2850. s_p_get_string(&conf->reboot_program, "RebootProgram", hashtbl);
  2851. s_p_get_string(&conf->salloc_default_command, "SallocDefaultCommand",
  2852. hashtbl);
  2853. s_p_get_string(&conf->sched_params, "SchedulerParameters", hashtbl);
  2854. if (s_p_get_uint16(&conf->schedport, "SchedulerPort", hashtbl)) {
  2855. if (conf->schedport == 0) {
  2856. error("SchedulerPort=0 is invalid");
  2857. conf->schedport = DEFAULT_SCHEDULER_PORT;
  2858. }
  2859. } else {
  2860. conf->schedport = DEFAULT_SCHEDULER_PORT;
  2861. }
  2862. if (!s_p_get_uint16(&conf->schedrootfltr,
  2863. "SchedulerRootFilter", hashtbl))
  2864. conf->schedrootfltr = DEFAULT_SCHEDROOTFILTER;
  2865. if (!s_p_get_uint16(&conf->sched_time_slice, "SchedulerTimeSlice",
  2866. hashtbl))
  2867. conf->sched_time_slice = DEFAULT_SCHED_TIME_SLICE;
  2868. else if (conf->sched_time_slice < 5) {
  2869. error("SchedulerTimeSlice must be at least 5 seconds");
  2870. conf->sched_time_slice = DEFAULT_SCHED_TIME_SLICE;
  2871. }
  2872. if (!s_p_get_string(&conf->schedtype, "SchedulerType", hashtbl))
  2873. conf->schedtype = xstrdup(DEFAULT_SCHEDTYPE);
  2874. if (strcmp(conf->priority_type, "priority/multifactor") == 0) {
  2875. if ((strcmp(conf->schedtype, "sched/wiki") == 0) ||
  2876. (strcmp(conf->schedtype, "sched/wiki2") == 0)) {
  2877. fatal("PriorityType=priority/multifactor is "
  2878. "incompatible with SchedulerType=%s",
  2879. conf->schedtype);
  2880. }
  2881. }
  2882. if (conf->preempt_mode) {
  2883. if ((strcmp(conf->schedtype, "sched/wiki") == 0) ||
  2884. (strcmp(conf->schedtype, "sched/wiki2") == 0)) {
  2885. fatal("Job preemption is incompatible with "
  2886. "SchedulerType=%s",
  2887. conf->schedtype);
  2888. }
  2889. }
  2890. if (!s_p_get_string(&conf->select_type, "SelectType", hashtbl))
  2891. conf->select_type = xstrdup(DEFAULT_SELECT_TYPE);
  2892. if (s_p_get_string(&temp_str,
  2893. "SelectTypeParameters", hashtbl)) {
  2894. uint16_t type_param;
  2895. if ((parse_select_type_param(temp_str, &type_param) < 0)) {
  2896. fatal("Bad SelectTypeParameter: %s", temp_str);
  2897. xfree(temp_str);
  2898. }
  2899. conf->select_type_param = type_param;
  2900. xfree(temp_str);
  2901. } else {
  2902. if (strcmp(conf->select_type,"select/cons_res") == 0)
  2903. conf->select_type_param = CR_CPU;
  2904. else
  2905. conf->select_type_param = 0;
  2906. }
  2907. if (!s_p_get_string( &conf->slurm_user_name, "SlurmUser", hashtbl)) {
  2908. conf->slurm_user_name = xstrdup("root");
  2909. conf->slurm_user_id = 0;
  2910. } else {
  2911. uid_t my_uid;
  2912. if (uid_from_string (conf->slurm_user_name, &my_uid) < 0) {
  2913. fatal ("Invalid user for SlurmUser %s, ignored",
  2914. conf->slurm_user_name);
  2915. xfree(conf->slurm_user_name);
  2916. } else {
  2917. conf->slurm_user_id = my_uid;
  2918. }
  2919. }
  2920. #ifdef HAVE_REAL_CRAY
  2921. /*
  2922. * This requirement derives from Cray ALPS:
  2923. * - ALPS reservations can only be created by the job owner or root
  2924. * (confirmation may be done by other non-privileged users);
  2925. * - freeing a reservation always requires root privileges.
  2926. */
  2927. if (conf->slurm_user_id != 0)
  2928. fatal("Cray requires SlurmUser=root (default), but have '%s'.",
  2929. conf->slurm_user_name);
  2930. #endif
  2931. if (!s_p_get_string( &conf->slurmd_user_name, "SlurmdUser", hashtbl)) {
  2932. conf->slurmd_user_name = xstrdup("root");
  2933. conf->slurmd_user_id = 0;
  2934. } else {
  2935. uid_t my_uid;
  2936. if (uid_from_string (conf->slurmd_user_name, &my_uid) < 0) {
  2937. fatal ("Invalid user for SlurmdUser %s, ignored",
  2938. conf->slurmd_user_name);
  2939. xfree(conf->slurmd_user_name);
  2940. } else {
  2941. conf->slurmd_user_id = my_uid;
  2942. }
  2943. }
  2944. if (s_p_get_string(&temp_str, "SlurmctldDebug", hashtbl)) {
  2945. conf->slurmctld_debug = log_string2num(temp_str);
  2946. if (conf->slurmctld_debug == (uint16_t) NO_VAL)
  2947. fatal("Invalid SlurmctldDebug %s", temp_str);
  2948. xfree(temp_str);
  2949. _normalize_debug_level(&conf->slurmctld_debug);
  2950. } else
  2951. conf->slurmctld_debug = LOG_LEVEL_INFO;
  2952. if (!s_p_get_string(&conf->slurmctld_pidfile,
  2953. "SlurmctldPidFile", hashtbl))
  2954. conf->slurmctld_pidfile = xstrdup(DEFAULT_SLURMCTLD_PIDFILE);
  2955. s_p_get_string(&conf->slurmctld_logfile, "SlurmctldLogFile", hashtbl);
  2956. if (s_p_get_string(&temp_str, "SlurmctldPort", hashtbl)) {
  2957. char *end_ptr = NULL;
  2958. long port_long;
  2959. slurm_seterrno(0);
  2960. port_long = strtol(temp_str, &end_ptr, 10);
  2961. if ((port_long == LONG_MIN) || (port_long == LONG_MAX) ||
  2962. (port_long <= 0) || errno) {
  2963. fatal("Invalid SlurmctldPort %s", temp_str);
  2964. }
  2965. conf->slurmctld_port = port_long;
  2966. if (end_ptr[0] == '-') {
  2967. port_long = strtol(end_ptr+1, NULL, 10);
  2968. if ((port_long == LONG_MIN) ||
  2969. (port_long == LONG_MAX) ||
  2970. (port_long <= conf->slurmctld_port) || errno) {
  2971. fatal("Invalid SlurmctldPort %s", temp_str);
  2972. }
  2973. conf->slurmctld_port_count = port_long + 1 -
  2974. conf->slurmctld_port;
  2975. } else if (end_ptr[0] != '\0') {
  2976. fatal("Invalid SlurmctldPort %s", temp_str);
  2977. } else {
  2978. conf->slurmctld_port_count = 1;
  2979. }
  2980. xfree(temp_str);
  2981. } else {
  2982. conf->slurmctld_port = SLURMCTLD_PORT;
  2983. conf->slurmctld_port_count = SLURMCTLD_PORT_COUNT;
  2984. }
  2985. if (!s_p_get_uint16(&conf->slurmctld_timeout,
  2986. "SlurmctldTimeout", hashtbl))
  2987. conf->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT;
  2988. if (s_p_get_string(&temp_str, "SlurmdDebug", hashtbl)) {
  2989. conf->slurmd_debug = log_string2num(temp_str);
  2990. if (conf->slurmd_debug == (uint16_t) NO_VAL)
  2991. fatal("Invalid SlurmdDebug %s", temp_str);
  2992. xfree(temp_str);
  2993. _normalize_debug_level(&conf->slurmd_debug);
  2994. } else
  2995. conf->slurmd_debug = LOG_LEVEL_INFO;
  2996. s_p_get_string(&conf->slurmd_logfile, "SlurmdLogFile", hashtbl);
  2997. if (!s_p_get_string(&conf->slurmd_pidfile, "SlurmdPidFile", hashtbl))
  2998. conf->slurmd_pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE);
  2999. if (!s_p_get_uint32(&conf->slurmd_port, "SlurmdPort", hashtbl))
  3000. conf->slurmd_port = SLURMD_PORT;
  3001. s_p_get_string(&conf->sched_logfile, "SlurmSchedLogFile", hashtbl);
  3002. if (!s_p_get_uint16(&conf->sched_log_level,
  3003. "SlurmSchedLogLevel", hashtbl))
  3004. conf->sched_log_level = DEFAULT_SCHED_LOG_LEVEL;
  3005. if (!s_p_get_string(&conf->slurmd_spooldir, "SlurmdSpoolDir", hashtbl))
  3006. conf->slurmd_spooldir = xstrdup(DEFAULT_SPOOLDIR);
  3007. if (!s_p_get_uint16(&conf->slurmd_timeout, "SlurmdTimeout", hashtbl))
  3008. conf->slurmd_timeout = DEFAULT_SLURMD_TIMEOUT;
  3009. s_p_get_string(&conf->srun_prolog, "SrunProlog", hashtbl);
  3010. s_p_get_string(&conf->srun_epilog, "SrunEpilog", hashtbl);
  3011. if (!s_p_get_string(&conf->state_save_location,
  3012. "StateSaveLocation", hashtbl))
  3013. conf->state_save_location = xstrdup(DEFAULT_SAVE_STATE_LOC);
  3014. s_p_get_string(&conf->suspend_exc_nodes, "SuspendExcNodes", hashtbl);
  3015. s_p_get_string(&conf->suspend_exc_parts, "SuspendExcParts", hashtbl);
  3016. s_p_get_string(&conf->suspend_program, "SuspendProgram", hashtbl);
  3017. if (!s_p_get_uint16(&conf->suspend_rate, "SuspendRate", hashtbl))
  3018. conf->suspend_rate = DEFAULT_SUSPEND_RATE;
  3019. if (s_p_get_long(&long_suspend_time, "SuspendTime", hashtbl)) {
  3020. if (long_suspend_time < -1) {
  3021. error("SuspendTime value (%ld) is less than -1",
  3022. long_suspend_time);
  3023. } else if ((long_suspend_time > -1) &&
  3024. (!strcmp(conf->select_type, "select/bluegene"))) {
  3025. fatal("SuspendTime (power save mode) incomptible with "
  3026. "select/bluegene");
  3027. } else
  3028. conf->suspend_time = long_suspend_time + 1;
  3029. } else
  3030. conf->suspend_time = 0;
  3031. if (!s_p_get_uint16(&conf->suspend_timeout, "SuspendTimeout", hashtbl))
  3032. conf->suspend_timeout = DEFAULT_SUSPEND_TIMEOUT;
  3033. /* see above for switch_type, order dependent */
  3034. if (!s_p_get_string(&conf->task_plugin, "TaskPlugin", hashtbl))
  3035. conf->task_plugin = xstrdup(DEFAULT_TASK_PLUGIN);
  3036. if (s_p_get_string(&temp_str, "TaskPluginParam", hashtbl)) {
  3037. char *last = NULL, *tok;
  3038. bool set_mode = false, set_unit = false;
  3039. tok = strtok_r(temp_str, ",", &last);
  3040. while (tok) {
  3041. if (strcasecmp(tok, "none") == 0) {
  3042. if (set_unit)
  3043. fatal("Bad TaskPluginParam: %s", tok);
  3044. set_unit = true;
  3045. conf->task_plugin_param |= CPU_BIND_NONE;
  3046. } else if (strcasecmp(tok, "boards") == 0) {
  3047. if (set_unit)
  3048. fatal("Bad TaskPluginParam: %s", tok);
  3049. set_unit = true;
  3050. conf->task_plugin_param |= CPU_BIND_TO_BOARDS;
  3051. } else if (strcasecmp(tok, "sockets") == 0) {
  3052. if (set_unit)
  3053. fatal("Bad TaskPluginParam: %s", tok);
  3054. set_unit = true;
  3055. conf->task_plugin_param |= CPU_BIND_TO_SOCKETS;
  3056. } else if (strcasecmp(tok, "cores") == 0) {
  3057. if (set_unit)
  3058. fatal("Bad TaskPluginParam: %s", tok);
  3059. set_unit = true;
  3060. conf->task_plugin_param |= CPU_BIND_TO_CORES;
  3061. } else if (strcasecmp(tok, "threads") == 0) {
  3062. if (set_unit)
  3063. fatal("Bad TaskPluginParam: %s", tok);
  3064. set_unit = true;
  3065. conf->task_plugin_param |= CPU_BIND_TO_THREADS;
  3066. } else if (strcasecmp(tok, "cpusets") == 0) {
  3067. if (set_mode)
  3068. fatal("Bad TaskPluginParam: %s", tok);
  3069. set_mode = true;
  3070. conf->task_plugin_param |= CPU_BIND_CPUSETS;
  3071. } else if (strcasecmp(tok, "sched") == 0) {
  3072. if (set_mode)
  3073. fatal("Bad TaskPluginParam: %s", tok);
  3074. set_mode = true;
  3075. /* No change to task_plugin_param,
  3076. * this is the default */
  3077. } else if (strcasecmp(tok, "verbose") == 0) {
  3078. conf->task_plugin_param |= CPU_BIND_VERBOSE;
  3079. } else
  3080. fatal("Bad TaskPluginParam: %s", tok);
  3081. tok = strtok_r(NULL, ",", &last);
  3082. }
  3083. xfree(temp_str);
  3084. }
  3085. s_p_get_string(&conf->task_epilog, "TaskEpilog", hashtbl);
  3086. s_p_get_string(&conf->task_prolog, "TaskProlog", hashtbl);
  3087. if (!s_p_get_string(&conf->tmp_fs, "TmpFS", hashtbl))
  3088. conf->tmp_fs = xstrdup(DEFAULT_TMP_FS);
  3089. if (!s_p_get_uint16(&conf->wait_time, "WaitTime", hashtbl))
  3090. conf->wait_time = DEFAULT_WAIT_TIME;
  3091. if (!s_p_get_string(&conf->topology_plugin, "TopologyPlugin", hashtbl))
  3092. conf->topology_plugin = xstrdup(DEFAULT_TOPOLOGY_PLUGIN);
  3093. if (s_p_get_uint16(&conf->tree_width, "TreeWidth", hashtbl)) {
  3094. if (conf->tree_width == 0) {
  3095. error("TreeWidth=0 is invalid");
  3096. conf->tree_width = DEFAULT_TREE_WIDTH;
  3097. }
  3098. } else {
  3099. conf->tree_width = DEFAULT_TREE_WIDTH;
  3100. }
  3101. if (s_p_get_boolean(&truth, "UsePAM", hashtbl) && truth) {
  3102. conf->use_pam = 1;
  3103. } else {
  3104. conf->use_pam = 0;
  3105. }
  3106. s_p_get_string(&conf->unkillable_program,
  3107. "UnkillableStepProgram", hashtbl);
  3108. if (!s_p_get_uint16(&conf->unkillable_timeout,
  3109. "UnkillableStepTimeout", hashtbl))
  3110. conf->unkillable_timeout = DEFAULT_UNKILLABLE_TIMEOUT;
  3111. s_p_get_uint16(&conf->vsize_factor, "VSizeFactor", hashtbl);
  3112. #ifdef HAVE_BG
  3113. if (conf->node_prefix == NULL)
  3114. fatal("No valid node name prefix identified");
  3115. #endif
  3116. xfree(default_storage_type);
  3117. xfree(default_storage_loc);
  3118. xfree(default_storage_host);
  3119. xfree(default_storage_user);
  3120. xfree(default_storage_pass);
  3121. }
  3122. /*
  3123. * Replace first "%h" in path string with NodeHostname.
  3124. * Replace first "%n" in path string with NodeName.
  3125. *
  3126. * NOTE: Caller should be holding slurm_conf_lock() when calling this function.
  3127. *
  3128. * Returns an xmalloc()ed string which the caller must free with xfree().
  3129. */
  3130. extern char *
  3131. slurm_conf_expand_slurmd_path(const char *path, const char *node_name)
  3132. {
  3133. char *hostname;
  3134. char *dir = NULL;
  3135. dir = xstrdup(path);
  3136. hostname = _internal_get_hostname(node_name);
  3137. xstrsubstitute(dir, "%h", hostname);
  3138. xfree(hostname);
  3139. xstrsubstitute(dir, "%n", node_name);
  3140. return dir;
  3141. }
  3142. /*
  3143. * debug_flags2str - convert a DebugFlags uint32_t to the equivalent string
  3144. * Keep in sync with debug_str2flags() below
  3145. */
  3146. extern char * debug_flags2str(uint32_t debug_flags)
  3147. {
  3148. char *rc = NULL;
  3149. if (debug_flags & DEBUG_FLAG_BG_ALGO) {
  3150. if (rc)
  3151. xstrcat(rc, ",");
  3152. xstrcat(rc, "BGBlockAlgo");
  3153. }
  3154. if (debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) {
  3155. if (rc)
  3156. xstrcat(rc, ",");
  3157. xstrcat(rc, "BGBlockAlgoDeep");
  3158. }
  3159. if (debug_flags & DEBUG_FLAG_BACKFILL) {
  3160. if (rc)
  3161. xstrcat(rc, ",");
  3162. xstrcat(rc, "Backfill");
  3163. }
  3164. if (debug_flags & DEBUG_FLAG_BG_PICK) {
  3165. if (rc)
  3166. xstrcat(rc, ",");
  3167. xstrcat(rc, "BGBlockPick");
  3168. }
  3169. if (debug_flags & DEBUG_FLAG_BG_WIRES) {
  3170. if (rc)
  3171. xstrcat(rc, ",");
  3172. xstrcat(rc, "BGBlockWires");
  3173. }
  3174. if (debug_flags & DEBUG_FLAG_CPU_BIND) {
  3175. if (rc)
  3176. xstrcat(rc, ",");
  3177. xstrcat(rc, "CPU_Bind");
  3178. }
  3179. if (debug_flags & DEBUG_FLAG_ENERGY) {
  3180. if (rc)
  3181. xstrcat(rc, ",");
  3182. xstrcat(rc, "Energy");
  3183. }
  3184. if (debug_flags & DEBUG_FLAG_FRONT_END) {
  3185. if (rc)
  3186. xstrcat(rc, ",");
  3187. xstrcat(rc, "FrontEnd");
  3188. }
  3189. if (debug_flags & DEBUG_FLAG_GANG) {
  3190. if (rc)
  3191. xstrcat(rc, ",");
  3192. xstrcat(rc, "Gang");
  3193. }
  3194. if (debug_flags & DEBUG_FLAG_GRES) {
  3195. if (rc)
  3196. xstrcat(rc, ",");
  3197. xstrcat(rc, "Gres");
  3198. }
  3199. if (debug_flags & DEBUG_FLAG_NO_CONF_HASH) {
  3200. if (rc)
  3201. xstrcat(rc, ",");
  3202. xstrcat(rc, "NO_CONF_HASH");
  3203. }
  3204. if (debug_flags & DEBUG_FLAG_NO_REALTIME) {
  3205. if (rc)
  3206. xstrcat(rc, ",");
  3207. xstrcat(rc, "NoRealTime");
  3208. }
  3209. if (debug_flags & DEBUG_FLAG_PRIO) {
  3210. if (rc)
  3211. xstrcat(rc, ",");
  3212. xstrcat(rc, "Priority");
  3213. }
  3214. if (debug_flags & DEBUG_FLAG_RESERVATION) {
  3215. if (rc)
  3216. xstrcat(rc, ",");
  3217. xstrcat(rc, "Reservation");
  3218. }
  3219. if (debug_flags & DEBUG_FLAG_SELECT_TYPE) {
  3220. if (rc)
  3221. xstrcat(rc, ",");
  3222. xstrcat(rc, "SelectType");
  3223. }
  3224. if (debug_flags & DEBUG_FLAG_STEPS) {
  3225. if (rc)
  3226. xstrcat(rc, ",");
  3227. xstrcat(rc, "Steps");
  3228. }
  3229. if (debug_flags & DEBUG_FLAG_SWITCH) {
  3230. if (rc)
  3231. xstrcat(rc, ",");
  3232. xstrcat(rc, "Switch");
  3233. }
  3234. if (debug_flags & DEBUG_FLAG_TRIGGERS) {
  3235. if (rc)
  3236. xstrcat(rc, ",");
  3237. xstrcat(rc, "Triggers");
  3238. }
  3239. if (debug_flags & DEBUG_FLAG_WIKI) {
  3240. if (rc)
  3241. xstrcat(rc, ",");
  3242. xstrcat(rc, "Wiki");
  3243. }
  3244. return rc;
  3245. }
  3246. /*
  3247. * debug_str2flags - Convert a DebugFlags string to the equivalent uint32_t
  3248. * Keep in sycn with debug_flags2str() above
  3249. * Returns NO_VAL if invalid
  3250. */
  3251. extern uint32_t debug_str2flags(char *debug_flags)
  3252. {
  3253. uint32_t rc = 0;
  3254. char *tmp_str, *tok, *last = NULL;
  3255. if (!debug_flags)
  3256. return rc;
  3257. tmp_str = xstrdup(debug_flags);
  3258. tok = strtok_r(tmp_str, ",", &last);
  3259. while (tok) {
  3260. if (strcasecmp(tok, "Backfill") == 0)
  3261. rc |= DEBUG_FLAG_BACKFILL;
  3262. else if (strcasecmp(tok, "BGBlockAlgo") == 0)
  3263. rc |= DEBUG_FLAG_BG_ALGO;
  3264. else if (strcasecmp(tok, "BGBlockAlgoDeep") == 0)
  3265. rc |= DEBUG_FLAG_BG_ALGO_DEEP;
  3266. else if (strcasecmp(tok, "BGBlockPick") == 0)
  3267. rc |= DEBUG_FLAG_BG_PICK;
  3268. else if (strcasecmp(tok, "BGBlockWires") == 0)
  3269. rc |= DEBUG_FLAG_BG_WIRES;
  3270. else if (strcasecmp(tok, "CPU_Bind") == 0)
  3271. rc |= DEBUG_FLAG_CPU_BIND;
  3272. else if (strcasecmp(tok, "Energy") == 0)
  3273. rc |= DEBUG_FLAG_ENERGY;
  3274. else if (strcasecmp(tok, "FrontEnd") == 0)
  3275. rc |= DEBUG_FLAG_FRONT_END;
  3276. else if (strcasecmp(tok, "Gang") == 0)
  3277. rc |= DEBUG_FLAG_GANG;
  3278. else if (strcasecmp(tok, "Gres") == 0)
  3279. rc |= DEBUG_FLAG_GRES;
  3280. else if (strcasecmp(tok, "NO_CONF_HASH") == 0)
  3281. rc |= DEBUG_FLAG_NO_CONF_HASH;
  3282. else if (strcasecmp(tok, "NoRealTime") == 0)
  3283. rc |= DEBUG_FLAG_NO_REALTIME;
  3284. else if (strcasecmp(tok, "Priority") == 0)
  3285. rc |= DEBUG_FLAG_PRIO;
  3286. else if (strcasecmp(tok, "Reservation") == 0)
  3287. rc |= DEBUG_FLAG_RESERVATION;
  3288. else if (strcasecmp(tok, "SelectType") == 0)
  3289. rc |= DEBUG_FLAG_SELECT_TYPE;
  3290. else if (strcasecmp(tok, "Steps") == 0)
  3291. rc |= DEBUG_FLAG_STEPS;
  3292. else if (strcasecmp(tok, "Switch") == 0)
  3293. rc |= DEBUG_FLAG_SWITCH;
  3294. else if (strcasecmp(tok, "Trigger") == 0)
  3295. rc |= DEBUG_FLAG_TRIGGERS;
  3296. else if (strcasecmp(tok, "Triggers") == 0)
  3297. rc |= DEBUG_FLAG_TRIGGERS;
  3298. else if (strcasecmp(tok, "Wiki") == 0)
  3299. rc |= DEBUG_FLAG_WIKI;
  3300. else {
  3301. error("Invalid DebugFlag: %s", tok);
  3302. rc = NO_VAL;
  3303. break;
  3304. }
  3305. tok = strtok_r(NULL, ",", &last);
  3306. }
  3307. xfree(tmp_str);
  3308. return rc;
  3309. }
  3310. /*
  3311. * reconfig_flags2str - convert a ReconfFlags uint16_t to the equivalent string
  3312. * Keep in sync with reconfig_str2flags() below
  3313. */
  3314. extern char * reconfig_flags2str(uint16_t reconfig_flags)
  3315. {
  3316. char *rc = NULL;
  3317. if (reconfig_flags & RECONFIG_KEEP_PART_INFO) {
  3318. if (rc)
  3319. xstrcat(rc, ",");
  3320. xstrcat(rc, "KeepPartInfo");
  3321. }
  3322. if (reconfig_flags & RECONFIG_KEEP_PART_STAT) {
  3323. if (rc)
  3324. xstrcat(rc, ",");
  3325. xstrcat(rc, "KeepPartState");
  3326. }
  3327. return rc;
  3328. }
  3329. /*
  3330. * reconfig_str2flags - Convert a ReconfFlags string to the equivalent uint16_t
  3331. * Keep in sync with reconfig_flags2str() above
  3332. * Returns NO_VAL if invalid
  3333. */
  3334. extern uint16_t reconfig_str2flags(char *reconfig_flags)
  3335. {
  3336. uint16_t rc = 0;
  3337. char *tmp_str, *tok, *last = NULL;
  3338. if (!reconfig_flags)
  3339. return rc;
  3340. tmp_str = xstrdup(reconfig_flags);
  3341. tok = strtok_r(tmp_str, ",", &last);
  3342. while (tok) {
  3343. if (strcasecmp(tok, "KeepPartInfo") == 0)
  3344. rc |= RECONFIG_KEEP_PART_INFO;
  3345. else if (strcasecmp(tok, "KeepPartState") == 0)
  3346. rc |= RECONFIG_KEEP_PART_STAT;
  3347. else {
  3348. error("Invalid ReconfigFlag: %s", tok);
  3349. rc = (uint16_t) NO_VAL;
  3350. break;
  3351. }
  3352. tok = strtok_r(NULL, ",", &last);
  3353. }
  3354. xfree(tmp_str);
  3355. return rc;
  3356. }
  3357. extern void destroy_config_key_pair(void *object)
  3358. {
  3359. config_key_pair_t *key_pair_ptr = (config_key_pair_t *)object;
  3360. if(key_pair_ptr) {
  3361. xfree(key_pair_ptr->name);
  3362. xfree(key_pair_ptr->value);
  3363. xfree(key_pair_ptr);
  3364. }
  3365. }
  3366. extern void pack_config_key_pair(void *in, uint16_t rpc_version, Buf buffer)
  3367. {
  3368. config_key_pair_t *object = (config_key_pair_t *)in;
  3369. packstr(object->name, buffer);
  3370. packstr(object->value, buffer);
  3371. }
  3372. extern int unpack_config_key_pair(void **object, uint16_t rpc_version,
  3373. Buf buffer)
  3374. {
  3375. uint32_t uint32_tmp;
  3376. config_key_pair_t *object_ptr = xmalloc(sizeof(config_key_pair_t));
  3377. *object = object_ptr;
  3378. safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer);
  3379. safe_unpackstr_xmalloc(&object_ptr->value, &uint32_tmp, buffer);
  3380. return SLURM_SUCCESS;
  3381. unpack_error:
  3382. destroy_config_key_pair(object_ptr);
  3383. *object = NULL;
  3384. return SLURM_ERROR;
  3385. }
  3386. extern int sort_key_pairs(config_key_pair_t *key_a, config_key_pair_t *key_b)
  3387. {
  3388. int size_a = strcmp(key_a->name, key_b->name);
  3389. if (size_a < 0)
  3390. return -1;
  3391. else if (size_a > 0)
  3392. return 1;
  3393. return 0;
  3394. }