PageRenderTime 68ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/src/common/read_config.c

https://github.com/cfenoy/slurm
C | 3796 lines | 3096 code | 402 blank | 298 comment | 860 complexity | 3b5a181d3bc89b4cb7479660715e6b7c MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. /*****************************************************************************\
  2. * read_config.c - read the overall slurm configuration file
  3. *****************************************************************************
  4. * Copyright (C) 2002-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Portions Copyright (C) 2008 Vijay Ramasubramanian.
  7. * Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>.
  8. * Portions (boards) copyright (C) 2012 Bull, <rod.schultz@bull.com>
  9. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  10. * Written by Morris Jette <jette1@llnl.gov>.
  11. * CODE-OCEC-09-009. All rights reserved.
  12. *
  13. * This file is part of SLURM, a resource management program.
  14. * For details, see <http://www.schedmd.com/slurmdocs/>.
  15. * Please also read the included file: DISCLAIMER.
  16. *
  17. * SLURM is free software; you can redistribute it and/or modify it under
  18. * the terms of the GNU General Public License as published by the Free
  19. * Software Foundation; either version 2 of the License, or (at your option)
  20. * any later version.
  21. *
  22. * In addition, as a special exception, the copyright holders give permission
  23. * to link the code of portions of this program with the OpenSSL library under
  24. * certain conditions as described in each individual source file, and
  25. * distribute linked combinations including the two. You must obey the GNU
  26. * General Public License in all respects for all of the code used other than
  27. * OpenSSL. If you modify file(s) with this exception, you may extend this
  28. * exception to your version of the file(s), but you are not obligated to do
  29. * so. If you do not wish to do so, delete this exception statement from your
  30. * version. If you delete this exception statement from all source files in
  31. * the program, then also delete it here.
  32. *
  33. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  34. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  35. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  36. * details.
  37. *
  38. * You should have received a copy of the GNU General Public License along
  39. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  40. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  41. \*****************************************************************************/
  42. #ifdef HAVE_CONFIG_H
  43. # include "config.h"
  44. #endif
  45. #include <arpa/inet.h>
  46. #include <assert.h>
  47. #include <ctype.h>
  48. #include <errno.h>
  49. #include <limits.h>
  50. #include <netdb.h>
  51. #include <netinet/in.h>
  52. #include <pthread.h>
  53. #include <pwd.h>
  54. #include <stdio.h>
  55. #include <stdlib.h>
  56. #include <string.h>
  57. #include <sys/socket.h>
  58. #include <sys/stat.h>
  59. #include <sys/types.h>
  60. #include <time.h>
  61. #include <unistd.h>
  62. #include "slurm/slurm.h"
  63. #include "src/common/hostlist.h"
  64. #include "src/common/log.h"
  65. #include "src/common/macros.h"
  66. #include "src/common/node_conf.h"
  67. #include "src/common/parse_config.h"
  68. #include "src/common/parse_spec.h"
  69. #include "src/common/parse_time.h"
  70. #include "src/common/read_config.h"
  71. #include "src/common/slurm_accounting_storage.h"
  72. #include "src/common/slurm_protocol_api.h"
  73. #include "src/common/slurm_protocol_defs.h"
  74. #include "src/common/slurm_rlimits_info.h"
  75. #include "src/common/slurm_selecttype_info.h"
  76. #include "src/common/strlcpy.h"
  77. #include "src/common/uid.h"
  78. #include "src/common/util-net.h"
  79. #include "src/common/xmalloc.h"
  80. #include "src/common/xstring.h"
  81. /*
  82. ** Define slurm-specific aliases for use by plugins, see slurm_xlator.h
  83. ** for details.
  84. */
  85. strong_alias(destroy_config_key_pair, slurm_destroy_config_key_pair);
  86. strong_alias(sort_key_pairs, slurm_sort_key_pairs);
  87. /* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf"
  88. * found in slurmctld.h */
  89. slurm_ctl_conf_t slurmctld_conf;
  90. static pthread_mutex_t conf_lock = PTHREAD_MUTEX_INITIALIZER;
  91. static s_p_hashtbl_t *conf_hashtbl = NULL;
  92. static slurm_ctl_conf_t *conf_ptr = &slurmctld_conf;
  93. static bool conf_initialized = false;
  94. static s_p_hashtbl_t *default_frontend_tbl;
  95. static s_p_hashtbl_t *default_nodename_tbl;
  96. static s_p_hashtbl_t *default_partition_tbl;
  97. inline static void _normalize_debug_level(uint16_t *level);
  98. static void _init_slurm_conf(const char *file_name);
  99. #define NAME_HASH_LEN 512
  100. typedef struct names_ll_s {
  101. char *alias; /* NodeName */
  102. char *hostname; /* NodeHostname */
  103. char *address; /* NodeAddr */
  104. uint16_t port;
  105. uint16_t cpus;
  106. uint16_t boards;
  107. uint16_t sockets;
  108. uint16_t cores;
  109. uint16_t threads;
  110. slurm_addr_t addr;
  111. bool addr_initialized;
  112. struct names_ll_s *next_alias;
  113. struct names_ll_s *next_hostname;
  114. } names_ll_t;
  115. static bool nodehash_initialized = false;
  116. static names_ll_t *host_to_node_hashtbl[NAME_HASH_LEN] = {NULL};
  117. static names_ll_t *node_to_host_hashtbl[NAME_HASH_LEN] = {NULL};
  118. static void _destroy_nodename(void *ptr);
  119. static int _parse_frontend(void **dest, slurm_parser_enum_t type,
  120. const char *key, const char *value,
  121. const char *line, char **leftover);
  122. static int _parse_nodename(void **dest, slurm_parser_enum_t type,
  123. const char *key, const char *value,
  124. const char *line, char **leftover);
  125. static bool _is_valid_path(char *path, char *msg);
  126. static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
  127. const char *key, const char *value,
  128. const char *line, char **leftover);
  129. static void _destroy_partitionname(void *ptr);
  130. static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
  131. const char *key, const char *value,
  132. const char *line, char **leftover);
  133. static void _destroy_downnodes(void *ptr);
  134. static int _defunct_option(void **dest, slurm_parser_enum_t type,
  135. const char *key, const char *value,
  136. const char *line, char **leftover);
  137. static void _validate_and_set_defaults(slurm_ctl_conf_t *conf,
  138. s_p_hashtbl_t *hashtbl);
  139. s_p_options_t slurm_conf_options[] = {
  140. {"AccountingStorageEnforce", S_P_STRING},
  141. {"AccountingStorageHost", S_P_STRING},
  142. {"AccountingStorageBackupHost", S_P_STRING},
  143. {"AccountingStorageLoc", S_P_STRING},
  144. {"AccountingStoragePass", S_P_STRING},
  145. {"AccountingStoragePort", S_P_UINT32},
  146. {"AccountingStorageType", S_P_STRING},
  147. {"AccountingStorageUser", S_P_STRING},
  148. {"AccountingStoreJobComment", S_P_BOOLEAN},
  149. {"AcctGatherEnergyType", S_P_STRING},
  150. {"AcctGatherNodeFreq", S_P_UINT16},
  151. {"AuthType", S_P_STRING},
  152. {"BackupAddr", S_P_STRING},
  153. {"BackupController", S_P_STRING},
  154. {"BatchStartTimeout", S_P_UINT16},
  155. {"CheckpointType", S_P_STRING},
  156. {"CacheGroups", S_P_UINT16},
  157. {"ClusterName", S_P_STRING},
  158. {"CompleteWait", S_P_UINT16},
  159. {"ControlAddr", S_P_STRING},
  160. {"ControlMachine", S_P_STRING},
  161. {"CryptoType", S_P_STRING},
  162. {"DebugFlags", S_P_STRING},
  163. {"DefaultStorageHost", S_P_STRING},
  164. {"DefaultStorageLoc", S_P_STRING},
  165. {"DefaultStoragePass", S_P_STRING},
  166. {"DefaultStoragePort", S_P_UINT32},
  167. {"DefaultStorageType", S_P_STRING},
  168. {"DefaultStorageUser", S_P_STRING},
  169. {"DefMemPerCPU", S_P_UINT32},
  170. {"DefMemPerNode", S_P_UINT32},
  171. {"DisableRootJobs", S_P_BOOLEAN},
  172. {"EnforcePartLimits", S_P_BOOLEAN},
  173. {"Epilog", S_P_STRING},
  174. {"EpilogMsgTime", S_P_UINT32},
  175. {"EpilogSlurmctld", S_P_STRING},
  176. {"FastSchedule", S_P_UINT16},
  177. {"FirstJobId", S_P_UINT32},
  178. {"GetEnvTimeout", S_P_UINT16},
  179. {"GresTypes", S_P_STRING},
  180. {"GroupUpdateForce", S_P_UINT16},
  181. {"GroupUpdateTime", S_P_UINT16},
  182. {"HealthCheckInterval", S_P_UINT16},
  183. {"HealthCheckProgram", S_P_STRING},
  184. {"InactiveLimit", S_P_UINT16},
  185. {"JobAcctGatherType", S_P_STRING},
  186. {"JobAcctGatherFrequency", S_P_UINT16},
  187. {"JobCheckpointDir", S_P_STRING},
  188. {"JobCompHost", S_P_STRING},
  189. {"JobCompLoc", S_P_STRING},
  190. {"JobCompPass", S_P_STRING},
  191. {"JobCompPort", S_P_UINT32},
  192. {"JobCompType", S_P_STRING},
  193. {"JobCompUser", S_P_STRING},
  194. {"JobCredentialPrivateKey", S_P_STRING},
  195. {"JobCredentialPublicCertificate", S_P_STRING},
  196. {"JobFileAppend", S_P_UINT16},
  197. {"JobRequeue", S_P_UINT16},
  198. {"JobSubmitPlugins", S_P_STRING},
  199. {"KillOnBadExit", S_P_UINT16},
  200. {"KillWait", S_P_UINT16},
  201. {"LaunchType", S_P_STRING},
  202. {"Licenses", S_P_STRING},
  203. {"MailProg", S_P_STRING},
  204. {"MaxJobCount", S_P_UINT32},
  205. {"MaxJobId", S_P_UINT32},
  206. {"MaxMemPerCPU", S_P_UINT32},
  207. {"MaxMemPerNode", S_P_UINT32},
  208. {"MaxStepCount", S_P_UINT32},
  209. {"MaxTasksPerNode", S_P_UINT16},
  210. {"MessageTimeout", S_P_UINT16},
  211. {"MinJobAge", S_P_UINT16},
  212. {"MpiDefault", S_P_STRING},
  213. {"MpiParams", S_P_STRING},
  214. {"OverTimeLimit", S_P_UINT16},
  215. {"PluginDir", S_P_STRING},
  216. {"PlugStackConfig", S_P_STRING},
  217. {"PreemptMode", S_P_STRING},
  218. {"PreemptType", S_P_STRING},
  219. {"PriorityDecayHalfLife", S_P_STRING},
  220. {"PriorityCalcPeriod", S_P_STRING},
  221. {"PriorityFavorSmall", S_P_BOOLEAN},
  222. {"PriorityMaxAge", S_P_STRING},
  223. {"PriorityUsageResetPeriod", S_P_STRING},
  224. {"PriorityType", S_P_STRING},
  225. {"PriorityFlags", S_P_STRING},
  226. {"PriorityWeightAge", S_P_UINT32},
  227. {"PriorityWeightFairshare", S_P_UINT32},
  228. {"PriorityWeightJobSize", S_P_UINT32},
  229. {"PriorityWeightPartition", S_P_UINT32},
  230. {"PriorityWeightQOS", S_P_UINT32},
  231. {"PrivateData", S_P_STRING},
  232. {"ProctrackType", S_P_STRING},
  233. {"Prolog", S_P_STRING},
  234. {"PrologSlurmctld", S_P_STRING},
  235. {"PropagatePrioProcess", S_P_UINT16},
  236. {"PropagateResourceLimitsExcept", S_P_STRING},
  237. {"PropagateResourceLimits", S_P_STRING},
  238. {"RebootProgram", S_P_STRING},
  239. {"ReconfigFlags", S_P_STRING},
  240. {"ResumeProgram", S_P_STRING},
  241. {"ResumeRate", S_P_UINT16},
  242. {"ResumeTimeout", S_P_UINT16},
  243. {"ResvOverRun", S_P_UINT16},
  244. {"ReturnToService", S_P_UINT16},
  245. {"SallocDefaultCommand", S_P_STRING},
  246. {"SchedulerAuth", S_P_STRING, _defunct_option},
  247. {"SchedulerParameters", S_P_STRING},
  248. {"SchedulerPort", S_P_UINT16},
  249. {"SchedulerRootFilter", S_P_UINT16},
  250. {"SchedulerTimeSlice", S_P_UINT16},
  251. {"SchedulerType", S_P_STRING},
  252. {"SelectType", S_P_STRING},
  253. {"SelectTypeParameters", S_P_STRING},
  254. {"SlurmUser", S_P_STRING},
  255. {"SlurmdUser", S_P_STRING},
  256. {"SlurmctldDebug", S_P_STRING},
  257. {"SlurmctldLogFile", S_P_STRING},
  258. {"SlurmctldPidFile", S_P_STRING},
  259. {"SlurmctldPort", S_P_STRING},
  260. {"SlurmctldTimeout", S_P_UINT16},
  261. {"SlurmdDebug", S_P_STRING},
  262. {"SlurmdLogFile", S_P_STRING},
  263. {"SlurmdPidFile", S_P_STRING},
  264. {"SlurmdPort", S_P_UINT32},
  265. {"SlurmdSpoolDir", S_P_STRING},
  266. {"SlurmdTimeout", S_P_UINT16},
  267. {"SlurmSchedLogFile", S_P_STRING},
  268. {"SlurmSchedLogLevel", S_P_UINT16},
  269. {"SrunEpilog", S_P_STRING},
  270. {"SrunProlog", S_P_STRING},
  271. {"StateSaveLocation", S_P_STRING},
  272. {"SuspendExcNodes", S_P_STRING},
  273. {"SuspendExcParts", S_P_STRING},
  274. {"SuspendProgram", S_P_STRING},
  275. {"SuspendRate", S_P_UINT16},
  276. {"SuspendTime", S_P_LONG},
  277. {"SuspendTimeout", S_P_UINT16},
  278. {"SwitchType", S_P_STRING},
  279. {"TaskEpilog", S_P_STRING},
  280. {"TaskProlog", S_P_STRING},
  281. {"TaskPlugin", S_P_STRING},
  282. {"TaskPluginParam", S_P_STRING},
  283. {"TmpFS", S_P_STRING},
  284. {"TopologyPlugin", S_P_STRING},
  285. {"TrackWCKey", S_P_BOOLEAN},
  286. {"TreeWidth", S_P_UINT16},
  287. {"UnkillableStepProgram", S_P_STRING},
  288. {"UnkillableStepTimeout", S_P_UINT16},
  289. {"UsePAM", S_P_BOOLEAN},
  290. {"VSizeFactor", S_P_UINT16},
  291. {"WaitTime", S_P_UINT16},
  292. {"FrontendName", S_P_ARRAY, _parse_frontend, destroy_frontend},
  293. {"NodeName", S_P_ARRAY, _parse_nodename, _destroy_nodename},
  294. {"PartitionName", S_P_ARRAY, _parse_partitionname,
  295. _destroy_partitionname},
  296. {"DownNodes", S_P_ARRAY, _parse_downnodes, _destroy_downnodes},
  297. {NULL}
  298. };
  299. static bool _is_valid_path (char *path, char *msg)
  300. {
  301. /*
  302. * Allocate temporary space for walking the list of dirs:
  303. */
  304. int pathlen = strlen (path);
  305. char *buf = xmalloc (pathlen + 2);
  306. char *p, *entry;
  307. if (strlcpy (buf, path, pathlen + 1) > pathlen + 1) {
  308. error ("is_valid_path: Failed to copy path!");
  309. goto out_false;
  310. }
  311. /*
  312. * Ensure the path ends with a ':'
  313. */
  314. if (buf [pathlen - 1] != ':') {
  315. buf [pathlen] = ':';
  316. buf [pathlen + 1] = '\0';
  317. }
  318. entry = buf;
  319. while ((p = strchr (entry, ':'))) {
  320. struct stat st;
  321. /*
  322. * NUL terminate colon and advance p
  323. */
  324. *(p++) = '\0';
  325. /*
  326. * Check to see if current path element is a valid dir
  327. */
  328. if (stat (entry, &st) < 0) {
  329. error ("%s: %s: %m", msg, entry);
  330. goto out_false;
  331. }
  332. else if (!S_ISDIR (st.st_mode)) {
  333. error ("%s: %s: Not a directory", msg, entry);
  334. goto out_false;
  335. }
  336. /*
  337. * Otherwise path element is valid, continue..
  338. */
  339. entry = p;
  340. }
  341. xfree (buf);
  342. return true;
  343. out_false:
  344. xfree (buf);
  345. return false;
  346. }
  347. static int _defunct_option(void **dest, slurm_parser_enum_t type,
  348. const char *key, const char *value,
  349. const char *line, char **leftover)
  350. {
  351. error("The option \"%s\" is defunct, see man slurm.conf.", key);
  352. return 0;
  353. }
  354. #if (SYSTEM_DIMENSIONS > 1)
  355. /* Used to get the general name of the machine, used primarily
  356. * for bluegene systems. Not in general use because some systems
  357. * have multiple prefix's such as foo[1-1000],bar[1-1000].
  358. */
  359. /* Caller must be holding slurm_conf_lock() */
  360. static void _set_node_prefix(const char *nodenames)
  361. {
  362. int i;
  363. char *tmp;
  364. xassert(nodenames != NULL);
  365. for (i = 1; nodenames[i] != '\0'; i++) {
  366. if((nodenames[i-1] == '[')
  367. || (nodenames[i-1] <= '9'
  368. && nodenames[i-1] >= '0'))
  369. break;
  370. }
  371. if(i == 1) {
  372. error("In your Node definition in your slurm.conf you "
  373. "gave a nodelist '%s' without a prefix. "
  374. "Please try something like bg%s.", nodenames, nodenames);
  375. }
  376. xfree(conf_ptr->node_prefix);
  377. if(nodenames[i] == '\0')
  378. conf_ptr->node_prefix = xstrdup(nodenames);
  379. else {
  380. tmp = xmalloc(sizeof(char)*i+1);
  381. memset(tmp, 0, i+1);
  382. snprintf(tmp, i, "%s", nodenames);
  383. conf_ptr->node_prefix = tmp;
  384. tmp = NULL;
  385. }
  386. debug3("Prefix is %s %s %d", conf_ptr->node_prefix, nodenames, i);
  387. }
  388. #endif /* SYSTEM_DIMENSIONS > 1 */
  389. static int _parse_frontend(void **dest, slurm_parser_enum_t type,
  390. const char *key, const char *value,
  391. const char *line, char **leftover)
  392. {
  393. s_p_hashtbl_t *tbl, *dflt;
  394. slurm_conf_frontend_t *n;
  395. char *node_state = NULL;
  396. static s_p_options_t _frontend_options[] = {
  397. {"FrontendAddr", S_P_STRING},
  398. {"Port", S_P_UINT16},
  399. {"Reason", S_P_STRING},
  400. {"State", S_P_STRING},
  401. {NULL}
  402. };
  403. #ifndef HAVE_FRONT_END
  404. fatal("Use of FrontendName in slurm.conf without SLURM being "
  405. "configured/built with the --enable-front-end option");
  406. #endif
  407. tbl = s_p_hashtbl_create(_frontend_options);
  408. s_p_parse_line(tbl, *leftover, leftover);
  409. /* s_p_dump_values(tbl, _frontend_options); */
  410. if (strcasecmp(value, "DEFAULT") == 0) {
  411. char *tmp;
  412. if (s_p_get_string(&tmp, "FrontendAddr", tbl)) {
  413. error("FrontendAddr not allowed with "
  414. "FrontendName=DEFAULT");
  415. xfree(tmp);
  416. s_p_hashtbl_destroy(tbl);
  417. return -1;
  418. }
  419. if (default_frontend_tbl != NULL) {
  420. s_p_hashtbl_merge(tbl, default_frontend_tbl);
  421. s_p_hashtbl_destroy(default_frontend_tbl);
  422. }
  423. default_frontend_tbl = tbl;
  424. return 0;
  425. } else {
  426. n = xmalloc(sizeof(slurm_conf_frontend_t));
  427. dflt = default_frontend_tbl;
  428. n->frontends = xstrdup(value);
  429. if (!s_p_get_string(&n->addresses, "FrontendAddr", tbl))
  430. n->addresses = xstrdup(n->frontends);
  431. if (!s_p_get_uint16(&n->port, "Port", tbl) &&
  432. !s_p_get_uint16(&n->port, "Port", dflt)) {
  433. /* This gets resolved in slurm_conf_get_port()
  434. * and slurm_conf_get_addr(). For now just
  435. * leave with a value of zero */
  436. n->port = 0;
  437. }
  438. if (!s_p_get_string(&n->reason, "Reason", tbl))
  439. s_p_get_string(&n->reason, "Reason", dflt);
  440. if (!s_p_get_string(&node_state, "State", tbl) &&
  441. !s_p_get_string(&node_state, "State", dflt)) {
  442. n->node_state = NODE_STATE_UNKNOWN;
  443. } else {
  444. n->node_state = state_str2int(node_state,
  445. (char *) value);
  446. if (n->node_state == (uint16_t) NO_VAL)
  447. n->node_state = NODE_STATE_UNKNOWN;
  448. xfree(node_state);
  449. }
  450. *dest = (void *)n;
  451. s_p_hashtbl_destroy(tbl);
  452. return 1;
  453. }
  454. /* should not get here */
  455. }
  456. static int _parse_nodename(void **dest, slurm_parser_enum_t type,
  457. const char *key, const char *value,
  458. const char *line, char **leftover)
  459. {
  460. s_p_hashtbl_t *tbl, *dflt;
  461. slurm_conf_node_t *n;
  462. int computed_procs;
  463. static s_p_options_t _nodename_options[] = {
  464. {"Boards", S_P_UINT16},
  465. {"CoresPerSocket", S_P_UINT16},
  466. {"CPUs", S_P_UINT16},
  467. {"Feature", S_P_STRING},
  468. {"Gres", S_P_STRING},
  469. {"NodeAddr", S_P_STRING},
  470. {"NodeHostname", S_P_STRING},
  471. {"Port", S_P_STRING},
  472. {"Procs", S_P_UINT16},
  473. {"RealMemory", S_P_UINT32},
  474. {"Reason", S_P_STRING},
  475. {"Sockets", S_P_UINT16},
  476. {"SocketsPerBoard", S_P_UINT16},
  477. {"State", S_P_STRING},
  478. {"ThreadsPerCore", S_P_UINT16},
  479. {"TmpDisk", S_P_UINT32},
  480. {"Weight", S_P_UINT32},
  481. {NULL}
  482. };
  483. tbl = s_p_hashtbl_create(_nodename_options);
  484. s_p_parse_line(tbl, *leftover, leftover);
  485. /* s_p_dump_values(tbl, _nodename_options); */
  486. if (strcasecmp(value, "DEFAULT") == 0) {
  487. char *tmp;
  488. if (s_p_get_string(&tmp, "NodeHostname", tbl)) {
  489. error("NodeHostname not allowed with "
  490. "NodeName=DEFAULT");
  491. xfree(tmp);
  492. s_p_hashtbl_destroy(tbl);
  493. return -1;
  494. }
  495. if (s_p_get_string(&tmp, "NodeAddr", tbl)) {
  496. error("NodeAddr not allowed with NodeName=DEFAULT");
  497. xfree(tmp);
  498. s_p_hashtbl_destroy(tbl);
  499. return -1;
  500. }
  501. if (default_nodename_tbl != NULL) {
  502. s_p_hashtbl_merge(tbl, default_nodename_tbl);
  503. s_p_hashtbl_destroy(default_nodename_tbl);
  504. }
  505. default_nodename_tbl = tbl;
  506. return 0;
  507. } else {
  508. bool no_cpus = false;
  509. bool no_boards = false;
  510. bool no_sockets = false;
  511. bool no_cores = false;
  512. bool no_threads = false;
  513. bool no_sockets_per_board = false;
  514. uint16_t sockets_per_board = 0;
  515. n = xmalloc(sizeof(slurm_conf_node_t));
  516. dflt = default_nodename_tbl;
  517. n->nodenames = xstrdup(value);
  518. #if (SYSTEM_DIMENSIONS > 1)
  519. if (conf_ptr->node_prefix == NULL)
  520. _set_node_prefix(n->nodenames);
  521. #endif
  522. if (!s_p_get_string(&n->hostnames, "NodeHostname", tbl))
  523. n->hostnames = xstrdup(n->nodenames);
  524. if (!s_p_get_string(&n->addresses, "NodeAddr", tbl))
  525. n->addresses = xstrdup(n->hostnames);
  526. if (!s_p_get_uint16(&n->boards, "Boards", tbl)
  527. && !s_p_get_uint16(&n->boards, "Boards", dflt)) {
  528. n->boards = 1;
  529. no_boards = true;
  530. }
  531. if (!s_p_get_uint16(&n->cores, "CoresPerSocket", tbl)
  532. && !s_p_get_uint16(&n->cores, "CoresPerSocket", dflt)) {
  533. n->cores = 1;
  534. no_cores = true;
  535. }
  536. if (!s_p_get_string(&n->feature, "Feature", tbl))
  537. s_p_get_string(&n->feature, "Feature", dflt);
  538. if (!s_p_get_string(&n->gres, "Gres", tbl))
  539. s_p_get_string(&n->gres, "Gres", dflt);
  540. if (!s_p_get_string(&n->port_str, "Port", tbl) &&
  541. !s_p_get_string(&n->port_str, "Port", dflt)) {
  542. /* This gets resolved in slurm_conf_get_port()
  543. * and slurm_conf_get_addr(). For now just
  544. * leave with a value of NULL */
  545. }
  546. if (!s_p_get_uint16(&n->cpus, "CPUs", tbl) &&
  547. !s_p_get_uint16(&n->cpus, "CPUs", dflt) &&
  548. !s_p_get_uint16(&n->cpus, "Procs", tbl) &&
  549. !s_p_get_uint16(&n->cpus, "Procs", dflt)) {
  550. n->cpus = 1;
  551. no_cpus = true;
  552. }
  553. if (!s_p_get_uint32(&n->real_memory, "RealMemory", tbl)
  554. && !s_p_get_uint32(&n->real_memory, "RealMemory", dflt))
  555. n->real_memory = 1;
  556. if (!s_p_get_string(&n->reason, "Reason", tbl))
  557. s_p_get_string(&n->reason, "Reason", dflt);
  558. if (!s_p_get_uint16(&n->sockets, "Sockets", tbl)
  559. && !s_p_get_uint16(&n->sockets, "Sockets", dflt)) {
  560. n->sockets = 1;
  561. no_sockets = true;
  562. }
  563. if (!s_p_get_uint16(&sockets_per_board, "SocketsPerBoard", tbl)
  564. && !s_p_get_uint16(&sockets_per_board, "SocketsPerBoard",
  565. dflt)) {
  566. sockets_per_board = 1;
  567. no_sockets_per_board = true;
  568. }
  569. if (!s_p_get_string(&n->state, "State", tbl)
  570. && !s_p_get_string(&n->state, "State", dflt))
  571. n->state = NULL;
  572. if (!s_p_get_uint16(&n->threads, "ThreadsPerCore", tbl)
  573. && !s_p_get_uint16(&n->threads, "ThreadsPerCore", dflt)) {
  574. n->threads = 1;
  575. no_threads = true;
  576. }
  577. if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl)
  578. && !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt))
  579. n->tmp_disk = 0;
  580. if (!s_p_get_uint32(&n->weight, "Weight", tbl)
  581. && !s_p_get_uint32(&n->weight, "Weight", dflt))
  582. n->weight = 1;
  583. s_p_hashtbl_destroy(tbl);
  584. if (n->cores == 0) { /* make sure cores is non-zero */
  585. error("NodeNames=%s CoresPerSocket=0 is invalid, "
  586. "reset to 1", n->nodenames);
  587. n->cores = 1;
  588. }
  589. if (n->threads == 0) { /* make sure threads is non-zero */
  590. error("NodeNames=%s ThreadsPerCore=0 is invalid, "
  591. "reset to 1", n->nodenames);
  592. n->threads = 1;
  593. }
  594. if (!no_sockets_per_board && sockets_per_board==0) {
  595. /* make sure sockets_per_boards is non-zero */
  596. error("NodeNames=%s SocketsPerBoards=0 is invalid, "
  597. "reset to 1", n->nodenames);
  598. sockets_per_board = 1;
  599. }
  600. if (no_boards) {
  601. /* This case is exactly like if was without boards,
  602. * Except SocketsPerBoard=# can be used,
  603. * But it can't be used with Sockets=# */
  604. n->boards = 1;
  605. if (!no_sockets && !no_sockets_per_board) {
  606. error("NodeNames=%s Sockets=# and "
  607. "SocketsPerBoard=# is invalid"
  608. ", using SocketsPerBoard",
  609. n->nodenames);
  610. n->sockets = sockets_per_board;
  611. }
  612. if (!no_sockets_per_board) {
  613. n->sockets = sockets_per_board;
  614. }
  615. if (!no_cpus && /* infer missing Sockets= */
  616. no_sockets) {
  617. n->sockets = n->cpus / (n->cores * n->threads);
  618. }
  619. if (n->sockets == 0) { /* make sure sockets != 0 */
  620. error("NodeNames=%s Sockets=0 is invalid, "
  621. "reset to 1", n->nodenames);
  622. n->sockets = 1;
  623. }
  624. if (no_cpus) { /* infer missing CPUs= */
  625. n->cpus = n->sockets * n->cores * n->threads;
  626. }
  627. /* if only CPUs= and Sockets=
  628. * specified check for match */
  629. if (!no_cpus && !no_sockets &&
  630. no_cores && no_threads &&
  631. (n->cpus != n->sockets)) {
  632. n->sockets = n->cpus;
  633. error("NodeNames=%s CPUs doesn't match "
  634. "Sockets, setting Sockets to %d",
  635. n->nodenames, n->sockets);
  636. }
  637. computed_procs = n->sockets * n->cores * n->threads;
  638. if ((n->cpus != n->sockets) &&
  639. (n->cpus != n->sockets * n->cores) &&
  640. (n->cpus != computed_procs)) {
  641. error("NodeNames=%s CPUs=%d doesn't match "
  642. "Sockets*CoresPerSocket*ThreadsPerCore "
  643. "(%d), resetting CPUs",
  644. n->nodenames, n->cpus, computed_procs);
  645. n->cpus = computed_procs;
  646. }
  647. } else {
  648. /* In this case Boards=# is used.
  649. * CPUs=# or Procs=# are ignored.
  650. */
  651. if (!no_cpus) {
  652. error("NodeNames=%s CPUs=# or Procs=# "
  653. "with Boards=# is invalid and "
  654. "is ignored.", n->nodenames);
  655. }
  656. if (n->boards == 0) {
  657. /* make sure boards is non-zero */
  658. error("NodeNames=%s Boards=0 is "
  659. "invalid, reset to 1",
  660. n->nodenames);
  661. n->boards = 1;
  662. }
  663. if (!no_sockets && !no_sockets_per_board) {
  664. error("NodeNames=%s Sockets=# and "
  665. "SocketsPerBoard=# is invalid, "
  666. "using SocketsPerBoard", n->nodenames);
  667. n->sockets = n->boards * sockets_per_board;
  668. } else if (!no_sockets_per_board) {
  669. n->sockets = n->boards * sockets_per_board;
  670. } else if (!no_sockets) {
  671. error("NodeNames=%s Sockets=# with Boards=# is"
  672. " not recommended, assume "
  673. "SocketsPerBoard was meant",
  674. n->nodenames);
  675. if (n->sockets == 0) {
  676. /* make sure sockets is non-zero */
  677. error("NodeNames=%s Sockets=0 is "
  678. "invalid, reset to 1",
  679. n->nodenames);
  680. n->sockets = 1;
  681. }
  682. n->sockets = n->boards * n->sockets;
  683. } else {
  684. n->sockets = n->boards;
  685. }
  686. /* Node boards factored into sockets */
  687. n->cpus = n->sockets * n->cores * n->threads;
  688. }
  689. *dest = (void *)n;
  690. return 1;
  691. }
  692. /* should not get here */
  693. }
  694. /* Destroy a front_end record built by slurm_conf_frontend_array() */
  695. extern void destroy_frontend(void *ptr)
  696. {
  697. slurm_conf_frontend_t *n = (slurm_conf_frontend_t *) ptr;
  698. xfree(n->frontends);
  699. xfree(n->addresses);
  700. xfree(n->reason);
  701. xfree(ptr);
  702. }
  703. /*
  704. * list_find_frontend - find an entry in the front_end list, see list.h for
  705. * documentation
  706. * IN key - is feature name or NULL for all features
  707. * RET 1 if found, 0 otherwise
  708. */
  709. extern int list_find_frontend (void *front_end_entry, void *key)
  710. {
  711. slurm_conf_frontend_t *front_end_ptr;
  712. if (key == NULL)
  713. return 1;
  714. front_end_ptr = (slurm_conf_frontend_t *) front_end_entry;
  715. if (strcmp(front_end_ptr->frontends, (char *) key) == 0)
  716. return 1;
  717. return 0;
  718. }
  719. static void _destroy_nodename(void *ptr)
  720. {
  721. slurm_conf_node_t *n = (slurm_conf_node_t *)ptr;
  722. xfree(n->addresses);
  723. xfree(n->feature);
  724. xfree(n->hostnames);
  725. xfree(n->gres);
  726. xfree(n->nodenames);
  727. xfree(n->port_str);
  728. xfree(n->reason);
  729. xfree(n->state);
  730. xfree(ptr);
  731. }
  732. int slurm_conf_frontend_array(slurm_conf_frontend_t **ptr_array[])
  733. {
  734. int count;
  735. slurm_conf_frontend_t **ptr;
  736. if (s_p_get_array((void ***)&ptr, &count, "FrontendName",
  737. conf_hashtbl)) {
  738. *ptr_array = ptr;
  739. return count;
  740. } else {
  741. #ifdef HAVE_FRONT_END
  742. /* No FrontendName in slurm.conf. Take the NodeAddr and
  743. * NodeHostName from the first node's record and use that to
  744. * build an equivalent structure to that constructed when
  745. * FrontendName is configured. This is intended for backward
  746. * compatability with SLURM version 2.2. */
  747. static slurm_conf_frontend_t local_front_end;
  748. static slurm_conf_frontend_t *local_front_end_array[2] =
  749. {NULL, NULL};
  750. static char addresses[1024], hostnames[1024];
  751. if (local_front_end_array[0] == NULL) {
  752. slurm_conf_node_t **node_ptr;
  753. int node_count = 0;
  754. if (!s_p_get_array((void ***)&node_ptr, &node_count,
  755. "NodeName", conf_hashtbl) ||
  756. (node_count == 0))
  757. fatal("No front end nodes configured");
  758. strncpy(addresses, node_ptr[0]->addresses,
  759. sizeof(addresses));
  760. strncpy(hostnames, node_ptr[0]->hostnames,
  761. sizeof(hostnames));
  762. local_front_end.addresses = addresses;
  763. local_front_end.frontends = hostnames;
  764. if (node_ptr[0]->port_str) {
  765. local_front_end.port = atoi(node_ptr[0]->
  766. port_str);
  767. }
  768. local_front_end.reason = NULL;
  769. local_front_end.node_state = NODE_STATE_UNKNOWN;
  770. local_front_end_array[0] = &local_front_end;
  771. }
  772. *ptr_array = local_front_end_array;
  773. return 1;
  774. #else
  775. *ptr_array = NULL;
  776. return 0;
  777. #endif
  778. }
  779. }
  780. int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[])
  781. {
  782. int count;
  783. slurm_conf_node_t **ptr;
  784. if (s_p_get_array((void ***)&ptr, &count, "NodeName", conf_hashtbl)) {
  785. *ptr_array = ptr;
  786. return count;
  787. } else {
  788. *ptr_array = NULL;
  789. return 0;
  790. }
  791. }
  792. static int _parse_partitionname(void **dest, slurm_parser_enum_t type,
  793. const char *key, const char *value,
  794. const char *line, char **leftover)
  795. {
  796. s_p_hashtbl_t *tbl, *dflt;
  797. slurm_conf_partition_t *p;
  798. char *tmp = NULL;
  799. static s_p_options_t _partition_options[] = {
  800. {"AllocNodes", S_P_STRING},
  801. {"AllowGroups", S_P_STRING},
  802. {"Alternate", S_P_STRING},
  803. {"DefMemPerCPU", S_P_UINT32},
  804. {"DefMemPerNode", S_P_UINT32},
  805. {"Default", S_P_BOOLEAN}, /* YES or NO */
  806. {"DefaultTime", S_P_STRING},
  807. {"DisableRootJobs", S_P_BOOLEAN}, /* YES or NO */
  808. {"GraceTime", S_P_UINT32},
  809. {"Hidden", S_P_BOOLEAN}, /* YES or NO */
  810. {"MaxMemPerCPU", S_P_UINT32},
  811. {"MaxMemPerNode", S_P_UINT32},
  812. {"MaxTime", S_P_STRING},
  813. {"MaxNodes", S_P_UINT32}, /* INFINITE or a number */
  814. {"MinNodes", S_P_UINT32},
  815. {"Nodes", S_P_STRING},
  816. {"PreemptMode", S_P_STRING},
  817. {"Priority", S_P_UINT16},
  818. {"RootOnly", S_P_BOOLEAN}, /* YES or NO */
  819. {"ReqResv", S_P_BOOLEAN}, /* YES or NO */
  820. {"Shared", S_P_STRING}, /* YES, NO, or FORCE */
  821. {"State", S_P_STRING}, /* UP, DOWN, INACTIVE or DRAIN */
  822. {NULL}
  823. };
  824. tbl = s_p_hashtbl_create(_partition_options);
  825. s_p_parse_line(tbl, *leftover, leftover);
  826. /* s_p_dump_values(tbl, _partition_options); */
  827. if (strcasecmp(value, "DEFAULT") == 0) {
  828. if (default_partition_tbl != NULL) {
  829. s_p_hashtbl_merge(tbl, default_partition_tbl);
  830. s_p_hashtbl_destroy(default_partition_tbl);
  831. }
  832. default_partition_tbl = tbl;
  833. return 0;
  834. } else {
  835. p = xmalloc(sizeof(slurm_conf_partition_t));
  836. dflt = default_partition_tbl;
  837. p->name = xstrdup(value);
  838. if (!s_p_get_string(&p->allow_groups, "AllowGroups", tbl))
  839. s_p_get_string(&p->allow_groups, "AllowGroups", dflt);
  840. if (p->allow_groups && strcasecmp(p->allow_groups, "ALL")==0) {
  841. xfree(p->allow_groups);
  842. p->allow_groups = NULL; /* NULL means allow all */
  843. }
  844. if (!s_p_get_string(&p->allow_alloc_nodes, "AllocNodes", tbl)) {
  845. s_p_get_string(&p->allow_alloc_nodes, "AllocNodes",
  846. dflt);
  847. if (p->allow_alloc_nodes &&
  848. (strcasecmp(p->allow_alloc_nodes, "ALL") == 0)) {
  849. /* NULL means to allow all submit notes */
  850. xfree(p->allow_alloc_nodes);
  851. }
  852. }
  853. if (!s_p_get_string(&p->alternate, "Alternate", tbl))
  854. s_p_get_string(&p->alternate, "Alternate", dflt);
  855. if (!s_p_get_boolean(&p->default_flag, "Default", tbl)
  856. && !s_p_get_boolean(&p->default_flag, "Default", dflt))
  857. p->default_flag = false;
  858. if (!s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode",
  859. tbl) &&
  860. !s_p_get_uint32(&p->def_mem_per_cpu, "DefMemPerNode",
  861. dflt)) {
  862. if (s_p_get_uint32(&p->def_mem_per_cpu,
  863. "DefMemPerCPU", tbl) ||
  864. s_p_get_uint32(&p->def_mem_per_cpu,
  865. "DefMemPerCPU", dflt)) {
  866. p->def_mem_per_cpu |= MEM_PER_CPU;
  867. } else {
  868. p->def_mem_per_cpu = 0;
  869. }
  870. }
  871. if (!s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode",
  872. tbl) &&
  873. !s_p_get_uint32(&p->max_mem_per_cpu, "MaxMemPerNode",
  874. dflt)) {
  875. if (s_p_get_uint32(&p->max_mem_per_cpu,
  876. "MaxMemPerCPU", tbl) ||
  877. s_p_get_uint32(&p->max_mem_per_cpu,
  878. "MaxMemPerCPU", dflt)) {
  879. p->max_mem_per_cpu |= MEM_PER_CPU;
  880. } else {
  881. p->max_mem_per_cpu = 0;
  882. }
  883. }
  884. if (!s_p_get_boolean((bool *)&p->disable_root_jobs,
  885. "DisableRootJobs", tbl))
  886. p->disable_root_jobs = (uint16_t)NO_VAL;
  887. if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl)
  888. && !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt))
  889. p->hidden_flag = false;
  890. if (!s_p_get_string(&tmp, "MaxTime", tbl) &&
  891. !s_p_get_string(&tmp, "MaxTime", dflt))
  892. p->max_time = INFINITE;
  893. else {
  894. int max_time = time_str2mins(tmp);
  895. if ((max_time < 0) && (max_time != INFINITE)) {
  896. error("Bad value \"%s\" for MaxTime", tmp);
  897. _destroy_partitionname(p);
  898. s_p_hashtbl_destroy(tbl);
  899. xfree(tmp);
  900. return -1;
  901. }
  902. p->max_time = max_time;
  903. xfree(tmp);
  904. }
  905. if (!s_p_get_uint32(&p->grace_time, "GraceTime", tbl) &&
  906. !s_p_get_uint32(&p->grace_time, "GraceTime", dflt))
  907. p->grace_time = 0;
  908. if (!s_p_get_string(&tmp, "DefaultTime", tbl) &&
  909. !s_p_get_string(&tmp, "DefaultTime", dflt))
  910. p->default_time = NO_VAL;
  911. else {
  912. int default_time = time_str2mins(tmp);
  913. if ((default_time < 0) && (default_time != INFINITE)) {
  914. error("Bad value \"%s\" for DefaultTime", tmp);
  915. _destroy_partitionname(p);
  916. s_p_hashtbl_destroy(tbl);
  917. xfree(tmp);
  918. return -1;
  919. }
  920. p->default_time = default_time;
  921. xfree(tmp);
  922. }
  923. if (!s_p_get_uint32(&p->max_nodes, "MaxNodes", tbl)
  924. && !s_p_get_uint32(&p->max_nodes, "MaxNodes", dflt))
  925. p->max_nodes = INFINITE;
  926. if (!s_p_get_uint32(&p->min_nodes, "MinNodes", tbl)
  927. && !s_p_get_uint32(&p->min_nodes, "MinNodes", dflt))
  928. p->min_nodes = 1;
  929. if (!s_p_get_string(&p->nodes, "Nodes", tbl)
  930. && !s_p_get_string(&p->nodes, "Nodes", dflt))
  931. p->nodes = NULL;
  932. else {
  933. int i;
  934. for (i=0; p->nodes[i]; i++) {
  935. if (isspace((int)p->nodes[i]))
  936. p->nodes[i] = ',';
  937. }
  938. }
  939. if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl)
  940. && !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt))
  941. p->root_only_flag = false;
  942. if (!s_p_get_boolean(&p->req_resv_flag, "ReqResv", tbl)
  943. && !s_p_get_boolean(&p->req_resv_flag, "ReqResv", dflt))
  944. p->req_resv_flag = false;
  945. if (s_p_get_string(&tmp, "PreemptMode", tbl) ||
  946. s_p_get_string(&tmp, "PreemptMode", dflt)) {
  947. p->preempt_mode = preempt_mode_num(tmp);
  948. if (p->preempt_mode == (uint16_t) NO_VAL) {
  949. error("Bad value \"%s\" for PreemptMode", tmp);
  950. xfree(tmp);
  951. return -1;
  952. }
  953. xfree(tmp);
  954. } else
  955. p->preempt_mode = (uint16_t) NO_VAL;
  956. if (!s_p_get_uint16(&p->priority, "Priority", tbl) &&
  957. !s_p_get_uint16(&p->priority, "Priority", dflt))
  958. p->priority = 1;
  959. if (s_p_get_string(&tmp, "Shared", tbl) ||
  960. s_p_get_string(&tmp, "Shared", dflt)) {
  961. if (strcasecmp(tmp, "NO") == 0)
  962. p->max_share = 1;
  963. #ifndef HAVE_XCPU
  964. /* Only "Shared=NO" is valid on XCPU systems */
  965. else if (strcasecmp(tmp, "EXCLUSIVE") == 0)
  966. p->max_share = 0;
  967. else if (strncasecmp(tmp, "YES:", 4) == 0) {
  968. int i = strtol(&tmp[4], (char **) NULL, 10);
  969. if (i <= 1) {
  970. error("Ignoring bad Shared value: %s",
  971. tmp);
  972. p->max_share = 1; /* Shared=NO */
  973. } else
  974. p->max_share = i;
  975. } else if (strcasecmp(tmp, "YES") == 0)
  976. p->max_share = 4;
  977. else if (strncasecmp(tmp, "FORCE:", 6) == 0) {
  978. int i = strtol(&tmp[6], (char **) NULL, 10);
  979. if (i < 1) {
  980. error("Ignoring bad Shared value: %s",
  981. tmp);
  982. p->max_share = 1; /* Shared=NO */
  983. } else
  984. p->max_share = i | SHARED_FORCE;
  985. } else if (strcasecmp(tmp, "FORCE") == 0)
  986. p->max_share = 4 | SHARED_FORCE;
  987. #endif
  988. else {
  989. error("Bad value \"%s\" for Shared", tmp);
  990. _destroy_partitionname(p);
  991. s_p_hashtbl_destroy(tbl);
  992. xfree(tmp);
  993. return -1;
  994. }
  995. xfree(tmp);
  996. } else
  997. p->max_share = 1;
  998. if (s_p_get_string(&tmp, "State", tbl) ||
  999. s_p_get_string(&tmp, "State", dflt)) {
  1000. if (strncasecmp(tmp, "DOWN", 4) == 0)
  1001. p->state_up = PARTITION_DOWN;
  1002. else if (strncasecmp(tmp, "UP", 2) == 0)
  1003. p->state_up = PARTITION_UP;
  1004. else if (strncasecmp(tmp, "DRAIN", 5) == 0)
  1005. p->state_up = PARTITION_DRAIN;
  1006. else if (strncasecmp(tmp, "INACTIVE", 8) == 0)
  1007. p->state_up = PARTITION_INACTIVE;
  1008. else {
  1009. error("Bad value \"%s\" for State", tmp);
  1010. _destroy_partitionname(p);
  1011. s_p_hashtbl_destroy(tbl);
  1012. xfree(tmp);
  1013. return -1;
  1014. }
  1015. xfree(tmp);
  1016. } else
  1017. p->state_up = PARTITION_UP;
  1018. s_p_hashtbl_destroy(tbl);
  1019. *dest = (void *)p;
  1020. return 1;
  1021. }
  1022. /* should not get here */
  1023. }
  1024. static void _destroy_partitionname(void *ptr)
  1025. {
  1026. slurm_conf_partition_t *p = (slurm_conf_partition_t *)ptr;
  1027. xfree(p->allow_alloc_nodes);
  1028. xfree(p->allow_groups);
  1029. xfree(p->alternate);
  1030. xfree(p->name);
  1031. xfree(p->nodes);
  1032. xfree(ptr);
  1033. }
  1034. int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[])
  1035. {
  1036. int count;
  1037. slurm_conf_partition_t **ptr;
  1038. if (s_p_get_array((void ***)&ptr, &count, "PartitionName",
  1039. conf_hashtbl)) {
  1040. *ptr_array = ptr;
  1041. return count;
  1042. } else {
  1043. *ptr_array = NULL;
  1044. return 0;
  1045. }
  1046. }
  1047. static int _parse_downnodes(void **dest, slurm_parser_enum_t type,
  1048. const char *key, const char *value,
  1049. const char *line, char **leftover)
  1050. {
  1051. s_p_hashtbl_t *tbl;
  1052. slurm_conf_downnodes_t *n;
  1053. static s_p_options_t _downnodes_options[] = {
  1054. {"Reason", S_P_STRING},
  1055. {"State", S_P_STRING},
  1056. {NULL}
  1057. };
  1058. tbl = s_p_hashtbl_create(_downnodes_options);
  1059. s_p_parse_line(tbl, *leftover, leftover);
  1060. /* s_p_dump_values(tbl, _downnodes_options); */
  1061. n = xmalloc(sizeof(slurm_conf_node_t));
  1062. n->nodenames = xstrdup(value);
  1063. if (!s_p_get_string(&n->reason, "Reason", tbl))
  1064. n->reason = xstrdup("Set in slurm.conf");
  1065. if (!s_p_get_string(&n->state, "State", tbl))
  1066. n->state = NULL;
  1067. s_p_hashtbl_destroy(tbl);
  1068. *dest = (void *)n;
  1069. return 1;
  1070. }
  1071. static void _destroy_downnodes(void *ptr)
  1072. {
  1073. slurm_conf_downnodes_t *n = (slurm_conf_downnodes_t *)ptr;
  1074. xfree(n->nodenames);
  1075. xfree(n->reason);
  1076. xfree(n->state);
  1077. xfree(ptr);
  1078. }
  1079. extern int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[])
  1080. {
  1081. int count;
  1082. slurm_conf_downnodes_t **ptr;
  1083. if (s_p_get_array((void ***)&ptr, &count, "DownNodes", conf_hashtbl)) {
  1084. *ptr_array = ptr;
  1085. return count;
  1086. } else {
  1087. *ptr_array = NULL;
  1088. return 0;
  1089. }
  1090. }
  1091. static void _free_name_hashtbl(void)
  1092. {
  1093. int i;
  1094. names_ll_t *p, *q;
  1095. for (i=0; i<NAME_HASH_LEN; i++) {
  1096. p = node_to_host_hashtbl[i];
  1097. while (p) {
  1098. xfree(p->alias);
  1099. xfree(p->hostname);
  1100. xfree(p->address);
  1101. q = p->next_alias;
  1102. xfree(p);
  1103. p = q;
  1104. }
  1105. node_to_host_hashtbl[i] = NULL;
  1106. host_to_node_hashtbl[i] = NULL;
  1107. }
  1108. nodehash_initialized = false;
  1109. }
  1110. static void _init_name_hashtbl(void)
  1111. {
  1112. return;
  1113. }
  1114. static int _get_hash_idx(const char *name)
  1115. {
  1116. int index = 0;
  1117. int j;
  1118. if (name == NULL)
  1119. return 0; /* degenerate case */
  1120. /* Multiply each character by its numerical position in the
  1121. * name string to add a bit of entropy, because host names such
  1122. * as cluster[0001-1000] can cause excessive index collisions.
  1123. */
  1124. for (j = 1; *name; name++, j++)
  1125. index += (int)*name * j;
  1126. index %= NAME_HASH_LEN;
  1127. if (index < 0)
  1128. index += NAME_HASH_LEN;
  1129. return index;
  1130. }
  1131. static void _push_to_hashtbls(char *alias, char *hostname,
  1132. char *address, uint16_t port,
  1133. uint16_t cpus, uint16_t boards,
  1134. uint16_t sockets, uint16_t cores,
  1135. uint16_t threads, bool front_end)
  1136. {
  1137. int hostname_idx, alias_idx;
  1138. names_ll_t *p, *new;
  1139. alias_idx = _get_hash_idx(alias);
  1140. hostname_idx = _get_hash_idx(hostname);
  1141. #if !defined(HAVE_FRONT_END) && !defined(MULTIPLE_SLURMD)
  1142. /* Ensure only one slurmd configured on each host */
  1143. p = host_to_node_hashtbl[hostname_idx];
  1144. while (p) {
  1145. if (strcmp(p->hostname, hostname) == 0) {
  1146. error("Duplicated NodeHostName %s in the config file",
  1147. hostname);
  1148. return;
  1149. }
  1150. p = p->next_hostname;
  1151. }
  1152. #endif
  1153. /* Ensure only one instance of each NodeName */
  1154. p = node_to_host_hashtbl[alias_idx];
  1155. while (p) {
  1156. if (strcmp(p->alias, alias)==0) {
  1157. if (front_end)
  1158. fatal("Frontend not configured correctly "
  1159. "in slurm.conf. See man slurm.conf "
  1160. "look for frontendname.");
  1161. fatal("Duplicated NodeName %s in the config file",
  1162. p->alias);
  1163. return;
  1164. }
  1165. p = p->next_alias;
  1166. }
  1167. /* Create the new data structure and link it into the hash tables */
  1168. new = (names_ll_t *)xmalloc(sizeof(names_ll_t));
  1169. new->alias = xstrdup(alias);
  1170. new->hostname = xstrdup(hostname);
  1171. new->address = xstrdup(address);
  1172. new->port = port;
  1173. new->cpus = cpus;
  1174. new->boards = boards;
  1175. new->sockets = sockets;
  1176. new->cores = cores;
  1177. new->threads = threads;
  1178. new->addr_initialized = false;
  1179. /* Put on end of each list */
  1180. new->next_alias = NULL;
  1181. if (node_to_host_hashtbl[alias_idx]) {
  1182. p = node_to_host_hashtbl[alias_idx];
  1183. while (p->next_alias)
  1184. p = p->next_alias;
  1185. p->next_alias = new;
  1186. } else {
  1187. node_to_host_hashtbl[alias_idx] = new;
  1188. }
  1189. new->next_hostname = NULL;
  1190. if (host_to_node_hashtbl[hostname_idx]) {
  1191. p = host_to_node_hashtbl[hostname_idx];
  1192. while (p->next_hostname)
  1193. p = p->next_hostname;
  1194. p->next_hostname = new;
  1195. } else {
  1196. host_to_node_hashtbl[hostname_idx] = new;
  1197. }
  1198. }
  1199. /*
  1200. * Register the given NodeName in the alias table.
  1201. * If node_hostname is NULL, only node_name will be used and
  1202. * no lookup table record is created.
  1203. */
  1204. static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr)
  1205. {
  1206. hostlist_t address_list = NULL;
  1207. hostlist_t alias_list = NULL;
  1208. hostlist_t hostname_list = NULL;
  1209. hostlist_t port_list = NULL;
  1210. char *address = NULL;
  1211. char *alias = NULL;
  1212. char *hostname = NULL;
  1213. char *port_str = NULL;
  1214. int error_code = SLURM_SUCCESS;
  1215. int address_count, alias_count, hostname_count, port_count, port_int;
  1216. uint16_t port = 0;
  1217. if ((node_ptr->nodenames == NULL) || (node_ptr->nodenames[0] == '\0'))
  1218. return -1;
  1219. if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) {
  1220. error("Unable to create NodeAddr list from %s",
  1221. node_ptr->addresses);
  1222. error_code = errno;
  1223. goto cleanup;
  1224. }
  1225. if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) {
  1226. error("Unable to create NodeName list from %s",
  1227. node_ptr->nodenames);
  1228. error_code = errno;
  1229. goto cleanup;
  1230. }
  1231. if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) {
  1232. error("Unable to create NodeHostname list from %s",
  1233. node_ptr->hostnames);
  1234. error_code = errno;
  1235. goto cleanup;
  1236. }
  1237. if (node_ptr->port_str && node_ptr->port_str[0] &&
  1238. (node_ptr->port_str[0] != '[') &&
  1239. (strchr(node_ptr->port_str, '-') ||
  1240. strchr(node_ptr->port_str, ','))) {
  1241. xstrfmtcat(port_str, "[%s]", node_ptr->port_str);
  1242. port_list = hostlist_create(port_str);
  1243. xfree(port_str);
  1244. } else {
  1245. port_list = hostlist_create(node_ptr->port_str);
  1246. }
  1247. if (port_list == NULL) {
  1248. error("Unable to create Port list from %s",
  1249. node_ptr->port_str);
  1250. error_code = errno;
  1251. goto cleanup;
  1252. }
  1253. #if (SYSTEM_DIMENSIONS > 1)
  1254. if (conf_ptr->node_prefix == NULL)
  1255. _set_node_prefix(node_ptr->nodenames);
  1256. #endif
  1257. /* some sanity checks */
  1258. address_count = hostlist_count(address_list);
  1259. alias_count = hostlist_count(alias_list);
  1260. hostname_count = hostlist_count(hostname_list);
  1261. port_count = hostlist_count(port_list);
  1262. #ifdef HAVE_FRONT_END
  1263. if ((address_count != alias_count) && (address_count != 1)) {
  1264. error("NodeAddr count must equal that of NodeName "
  1265. "records of there must be no more than one");
  1266. goto cleanup;
  1267. }
  1268. if ((hostname_count != alias_count) && (hostname_count != 1)) {
  1269. error("NodeHostname count must equal that of NodeName "
  1270. "records of there must be no more than one");
  1271. goto cleanup;
  1272. }
  1273. #else
  1274. #ifdef MULTIPLE_SLURMD
  1275. if ((address_count != alias_count) && (address_count != 1)) {
  1276. error("NodeAddr count must equal that of NodeName "
  1277. "records of there must be no more than one");
  1278. goto cleanup;
  1279. }
  1280. #else
  1281. if (address_count < alias_count) {
  1282. error("At least as many NodeAddr are required as NodeName");
  1283. goto cleanup;
  1284. }
  1285. if (hostname_count < alias_count) {
  1286. error("At least as many NodeHostname are required "
  1287. "as NodeName");
  1288. goto cleanup;
  1289. }
  1290. #endif /* MULTIPLE_SLURMD */
  1291. #endif /* HAVE_FRONT_END */
  1292. if ((port_count != alias_count) && (port_count > 1)) {
  1293. error("Port count must equal that of NodeName "
  1294. "records or there must be no more than one");
  1295. goto cleanup;
  1296. }
  1297. /* now build the individual node structures */
  1298. while ((alias = hostlist_shift(alias_list))) {
  1299. if (address_count > 0) {
  1300. address_count--;
  1301. if (address)
  1302. free(address);
  1303. address = hostlist_shift(address_list);
  1304. }
  1305. if (hostname_count > 0) {
  1306. hostname_count--;
  1307. if (hostname)
  1308. free(hostname);
  1309. hostname = hostlist_shift(hostname_list);
  1310. }
  1311. if (port_count > 0) {
  1312. port_count--;
  1313. if (port_str)
  1314. free(port_str);
  1315. port_str = hostlist_shift(port_list);
  1316. port_int = atoi(port_str);
  1317. if ((port_int <= 0) || (port_int > 0xffff))
  1318. fatal("Invalid Port %s", node_ptr->port_str);
  1319. port = port_int;
  1320. }
  1321. _push_to_hashtbls(alias, hostname, address, port,
  1322. node_ptr->cpus, node_ptr->boards,
  1323. node_ptr->sockets, node_ptr->cores,
  1324. node_ptr->threads, 0);
  1325. free(alias);
  1326. }
  1327. if (address)
  1328. free(address);
  1329. if (hostname)
  1330. free(hostname);
  1331. if (port_str)
  1332. free(port_str);
  1333. /* free allocated storage */
  1334. cleanup:
  1335. if (address_list)
  1336. hostlist_destroy(address_list);
  1337. if (alias_list)
  1338. hostlist_destroy(alias_list);
  1339. if (hostname_list)
  1340. hostlist_destroy(hostname_list);
  1341. if (port_list)
  1342. hostlist_destroy(port_list);
  1343. return error_code;
  1344. }
  1345. static int _register_front_ends(slurm_conf_frontend_t *front_end_ptr)
  1346. {
  1347. hostlist_t hostname_list = NULL;
  1348. hostlist_t address_list = NULL;
  1349. char *hostname = NULL;
  1350. char *address = NULL;
  1351. int error_code = SLURM_SUCCESS;
  1352. if ((front_end_ptr->frontends == NULL) ||
  1353. (front_end_ptr->frontends[0] == '\0'))
  1354. return -1;
  1355. if ((hostname_list = hostlist_create(front_end_ptr->frontends))
  1356. == NULL) {
  1357. error("Unable to create FrontendNames list from %s",
  1358. front_end_ptr->frontends);
  1359. error_code = errno;
  1360. goto cleanup;
  1361. }
  1362. if ((address_list = hostlist_create(front_end_ptr->addresses))
  1363. == NULL) {
  1364. error("Unable to create FrontendAddr list from %s",
  1365. front_end_ptr->addresses);
  1366. error_code = errno;
  1367. goto cleanup;
  1368. }
  1369. if (hostlist_count(address_list) != hostlist_count(hostname_list)) {
  1370. error("Node count mismatch between FrontendNames and "
  1371. "FrontendAddr");
  1372. goto cleanup;
  1373. }
  1374. while ((hostname = hostlist_shift(hostname_list))) {
  1375. address = hostlist_shift(address_list);
  1376. _push_to_hashtbls(hostname, hostname, address,
  1377. front_end_ptr->port, 1, 1, 1, 1, 1, 1);
  1378. free(hostname);
  1379. free(address);
  1380. }
  1381. /* free allocated storage */
  1382. cleanup:
  1383. if (hostname_list)
  1384. hostlist_destroy(hostname_list);
  1385. if (address_list)
  1386. hostlist_destroy(address_list);
  1387. return error_code;
  1388. }
  1389. static void _init_slurmd_nodehash(void)
  1390. {
  1391. slurm_conf_node_t **ptr_array;
  1392. slurm_conf_frontend_t **ptr_front_end;
  1393. int count, i;
  1394. if (nodehash_initialized)
  1395. return;
  1396. else
  1397. nodehash_initialized = true;
  1398. if (!conf_initialized) {
  1399. _init_slurm_conf(NULL);
  1400. conf_initialized = true;
  1401. }
  1402. count = slurm_conf_nodename_array(&ptr_array);
  1403. for (i = 0; i < count; i++)
  1404. _register_conf_node_aliases(ptr_array[i]);
  1405. count = slurm_conf_frontend_array(&ptr_front_end);
  1406. for (i = 0; i < count; i++)
  1407. _register_front_ends(ptr_front_end[i]);
  1408. }
  1409. /*
  1410. * Caller needs to call slurm_conf_lock() and hold the lock before
  1411. * calling this function (and call slurm_conf_unlock() afterwards).
  1412. */
  1413. static char *_internal_get_hostname(const char *node_name)
  1414. {
  1415. int idx;
  1416. names_ll_t *p;
  1417. _init_slurmd_nodehash();
  1418. idx = _get_hash_idx(node_name);
  1419. p = node_to_host_hashtbl[idx];
  1420. while (p) {
  1421. if (strcmp(p->alias, node_name) == 0) {
  1422. return xstrdup(p->hostname);
  1423. }
  1424. p = p->next_alias;
  1425. }
  1426. return NULL;
  1427. }
  1428. /*
  1429. * slurm_conf_get_hostname - Return the NodeHostname for given NodeName
  1430. */
  1431. extern char *slurm_conf_get_hostname(const char *node_name)
  1432. {
  1433. char *hostname = NULL;
  1434. slurm_conf_lock();
  1435. hostname = _internal_get_hostname(node_name);
  1436. slurm_conf_unlock();
  1437. return hostname;
  1438. }
  1439. /*
  1440. * slurm_conf_get_nodename - Return the NodeName for given NodeHostname
  1441. *
  1442. * NOTE: Call xfree() to release returned value's memory.
  1443. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1444. */
  1445. extern char *slurm_conf_get_nodename(const char *node_hostname)
  1446. {
  1447. char *alias = NULL;
  1448. int idx;
  1449. names_ll_t *p;
  1450. #ifdef HAVE_FRONT_END
  1451. slurm_conf_frontend_t *front_end_ptr = NULL;
  1452. slurm_conf_lock();
  1453. if (!front_end_list) {
  1454. debug("front_end_list is NULL");
  1455. } else {
  1456. front_end_ptr = list_find_first(front_end_list,
  1457. list_find_frontend,
  1458. (char *) node_hostname);
  1459. if (front_end_ptr) {
  1460. alias = xstrdup(front_end_ptr->frontends);
  1461. slurm_conf_unlock();
  1462. return alias;
  1463. }
  1464. }
  1465. #else
  1466. slurm_conf_lock();
  1467. #endif
  1468. _init_slurmd_nodehash();
  1469. idx = _get_hash_idx(node_hostname);
  1470. p = host_to_node_hashtbl[idx];
  1471. while (p) {
  1472. if (strcmp(p->hostname, node_hostname) == 0) {
  1473. alias = xstrdup(p->alias);
  1474. break;
  1475. }
  1476. p = p->next_hostname;
  1477. }
  1478. slurm_conf_unlock();
  1479. return alias;
  1480. }
  1481. /*
  1482. * slurm_conf_get_aliases - Return all the nodes NodeName value
  1483. * associated to a given NodeHostname (usefull in case of multiple-slurmd
  1484. * to get the list of virtual nodes associated with a real node)
  1485. *
  1486. * NOTE: Call xfree() to release returned value's memory.
  1487. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1488. */
  1489. extern char *slurm_conf_get_aliases(const char *node_hostname)
  1490. {
  1491. int idx;
  1492. names_ll_t *p;
  1493. char *aliases = NULL;
  1494. char *s = NULL;
  1495. slurm_conf_lock();
  1496. _init_slurmd_nodehash();
  1497. idx = _get_hash_idx(node_hostname);
  1498. p = host_to_node_hashtbl[idx];
  1499. while (p) {
  1500. if (strcmp(p->hostname, node_hostname) == 0) {
  1501. if ( aliases == NULL )
  1502. aliases = xstrdup(p->alias);
  1503. else {
  1504. s = xstrdup_printf("%s %s",aliases,p->alias);
  1505. xfree(aliases);
  1506. aliases = s;
  1507. }
  1508. }
  1509. p = p->next_hostname;
  1510. }
  1511. slurm_conf_unlock();
  1512. return aliases;
  1513. }
  1514. /*
  1515. * slurm_conf_get_nodeaddr - Return the NodeAddr for given NodeHostname
  1516. *
  1517. * NOTE: Call xfree() to release returned value's memory.
  1518. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1519. */
  1520. extern char *slurm_conf_get_nodeaddr(const char *node_hostname)
  1521. {
  1522. int idx;
  1523. names_ll_t *p;
  1524. slurm_conf_lock();
  1525. _init_slurmd_nodehash();
  1526. idx = _get_hash_idx(node_hostname);
  1527. p = host_to_node_hashtbl[idx];
  1528. while (p) {
  1529. if (strcmp(p->hostname, node_hostname) == 0) {
  1530. char *nodeaddr;
  1531. if (p->address != NULL)
  1532. nodeaddr = xstrdup(p->address);
  1533. else
  1534. nodeaddr = NULL;
  1535. slurm_conf_unlock();
  1536. return nodeaddr;
  1537. }
  1538. p = p->next_hostname;
  1539. }
  1540. slurm_conf_unlock();
  1541. return NULL;
  1542. }
  1543. /*
  1544. * slurm_conf_get_nodename_from_addr - Return the NodeName for given NodeAddr
  1545. *
  1546. * NOTE: Call xfree() to release returned value's memory.
  1547. * NOTE: Caller must NOT be holding slurm_conf_lock().
  1548. */
  1549. extern char *slurm_conf_get_nodename_from_addr(const char *node_addr)
  1550. {
  1551. unsigned char buf[HOSTENT_SIZE];
  1552. struct hostent *hptr;
  1553. unsigned long addr = inet_addr(node_addr);
  1554. char *start_name, *ret_name = NULL, *dot_ptr;
  1555. if (!(hptr = get_host_by_addr((char *)&addr, sizeof(addr), AF_INET,
  1556. buf, sizeof(buf), NULL))) {
  1557. error("No node found with addr %s", node_addr);
  1558. return NULL;
  1559. }
  1560. if (!strcmp(hptr->h_name, "localhost")) {
  1561. start_name = xshort_hostname();
  1562. } else {
  1563. start_name = xstrdup(hptr->h_name);
  1564. dot_ptr = strchr(start_name, '.');
  1565. if (dot_ptr == NULL)
  1566. dot_ptr = start_name + strlen(start_name);
  1567. else
  1568. dot_ptr[0] = '\0';
  1569. }
  1570. ret_name = slurm_conf_get_aliases(start_name);
  1571. xfree(start_name);
  1572. return ret_name;
  1573. }
  1574. /*
  1575. * slurm_conf_get_aliased_nodename - Return the NodeName for the
  1576. * complete hostname string returned by gethostname if there is
  1577. * such a match, otherwise iterate through any aliases returned
  1578. * by get_host_by_name
  1579. */
  1580. extern char *slurm_conf_get_aliased_nodename()
  1581. {
  1582. char hostname_full[1024];
  1583. int error_code;
  1584. char *nodename;
  1585. error_code = gethostname(hostname_full, sizeof(hostname_full));
  1586. /* we shouldn't have any problem here since by the time
  1587. * this function has been called, gethostname_short,
  1588. * which invokes gethostname, has probably already been called
  1589. * successfully, so just return NULL if something weird
  1590. * happens at this point
  1591. */
  1592. if (error_code)
  1593. return NULL;
  1594. nodename = slurm_conf_get_nodename(hostname_full);
  1595. /* if the full hostname did not match a nodename */
  1596. if (nodename == NULL) {
  1597. /* use get_host_by_name; buffer sizes, semantics, etc.
  1598. * cop…

Large files files are truncated, but you can click here to view the full file