PageRenderTime 85ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/src/common/gres.c

https://github.com/cfenoy/slurm
C | 4721 lines | 3817 code | 442 blank | 462 comment | 1109 complexity | 5c6779d3369a8bd29341bd3b1700a882 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * gres.c - driver for gres plugin
  3. *****************************************************************************
  4. * Copyright (C) 2010 Lawrence Livermore National Security.
  5. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6. * Written by Morris Jette <jette1@llnl.gov>
  7. * CODE-OCEC-09-009. All rights reserved.
  8. *
  9. * This file is part of SLURM, a resource management program.
  10. * For details, see <http://www.schedmd.com/slurmdocs/>.
  11. * Please also read the included file: DISCLAIMER.
  12. *
  13. * SLURM is free software; you can redistribute it and/or modify it under
  14. * the terms of the GNU General Public License as published by the Free
  15. * Software Foundation; either version 2 of the License, or (at your option)
  16. * any later version.
  17. *
  18. * In addition, as a special exception, the copyright holders give permission
  19. * to link the code of portions of this program with the OpenSSL library under
  20. * certain conditions as described in each individual source file, and
  21. * distribute linked combinations including the two. You must obey the GNU
  22. * General Public License in all respects for all of the code used other than
  23. * OpenSSL. If you modify file(s) with this exception, you may extend this
  24. * exception to your version of the file(s), but you are not obligated to do
  25. * so. If you do not wish to do so, delete this exception statement from your
  26. * version. If you delete this exception statement from all source files in
  27. * the program, then also delete it here.
  28. *
  29. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  30. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  31. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  32. * details.
  33. *
  34. * You should have received a copy of the GNU General Public License along
  35. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  36. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  37. \*****************************************************************************/
  38. #if HAVE_CONFIG_H
  39. # include "config.h"
  40. # if STDC_HEADERS
  41. # include <string.h>
  42. # endif
  43. # if HAVE_SYS_TYPES_H
  44. # include <sys/types.h>
  45. # endif /* HAVE_SYS_TYPES_H */
  46. # if HAVE_UNISTD_H
  47. # include <unistd.h>
  48. # endif
  49. # if HAVE_INTTYPES_H
  50. # include <inttypes.h>
  51. # else /* ! HAVE_INTTYPES_H */
  52. # if HAVE_STDINT_H
  53. # include <stdint.h>
  54. # endif
  55. # endif /* HAVE_INTTYPES_H */
  56. # ifdef HAVE_LIMITS_H
  57. # include <limits.h>
  58. # endif
  59. #else /* ! HAVE_CONFIG_H */
  60. # include <limits.h>
  61. # include <sys/types.h>
  62. # include <stdint.h>
  63. # include <stdlib.h>
  64. # include <string.h>
  65. #endif /* HAVE_CONFIG_H */
  66. #include <stdio.h>
  67. #include <stdlib.h>
  68. #include <sys/stat.h>
  69. #include "slurm/slurm.h"
  70. #include "slurm/slurm_errno.h"
  71. #include "src/common/gres.h"
  72. #include "src/common/list.h"
  73. #include "src/common/macros.h"
  74. #include "src/common/pack.h"
  75. #include "src/common/parse_config.h"
  76. #include "src/common/plugin.h"
  77. #include "src/common/plugrack.h"
  78. #include "src/common/slurm_protocol_api.h"
  79. #include "src/common/xmalloc.h"
  80. #include "src/common/xstring.h"
  81. #define GRES_MAGIC 0x438a34d4
  82. /* Gres symbols provided by the plugin */
  83. typedef struct slurm_gres_ops {
  84. int (*node_config_load) ( List gres_conf_list );
  85. void (*job_set_env) ( char ***job_env_ptr,
  86. void *gres_ptr );
  87. void (*step_set_env) ( char ***job_env_ptr,
  88. void *gres_ptr );
  89. void (*send_stepd) ( int fd );
  90. void (*recv_stepd) ( int fd );
  91. } slurm_gres_ops_t;
  92. /* Gres plugin context, one for each gres type */
  93. typedef struct slurm_gres_context {
  94. plugin_handle_t cur_plugin;
  95. char * gres_name; /* name (e.g. "gpu") */
  96. char * gres_name_colon; /* name + colon (e.g. "gpu:") */
  97. int gres_name_colon_len; /* size of gres_name_colon */
  98. char * gres_type; /* plugin name (e.g. "gres/gpu") */
  99. bool has_file; /* found "File=" in slurm.conf */
  100. slurm_gres_ops_t ops; /* pointers to plugin symbols */
  101. uint32_t plugin_id; /* key for searches */
  102. plugrack_t plugin_list; /* plugrack info */
  103. } slurm_gres_context_t;
  104. /* Generic gres data structure for adding to a list. Depending upon the
  105. * context, gres_data points to gres_node_state_t, gres_job_state_t or
  106. * gres_step_state_t */
  107. typedef struct gres_state {
  108. uint32_t plugin_id;
  109. void *gres_data;
  110. } gres_state_t;
  111. /* Local variables */
  112. static int gres_context_cnt = -1;
  113. static uint32_t gres_cpu_cnt = 0;
  114. static bool gres_debug = false;
  115. static slurm_gres_context_t *gres_context = NULL;
  116. static char *gres_plugin_list = NULL;
  117. static pthread_mutex_t gres_context_lock = PTHREAD_MUTEX_INITIALIZER;
  118. static List gres_conf_list = NULL;
  119. static bool init_run = false;
  120. /* Local functions */
  121. static gres_node_state_t *
  122. _build_gres_node_state(void);
  123. static uint32_t _build_id(char *gres_name);
  124. static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size);
  125. static void _destroy_gres_slurmd_conf(void *x);
  126. static uint32_t _get_gres_cnt(char *orig_config, char *gres_name,
  127. char *gres_name_colon, int gres_name_colon_len);
  128. static char * _get_gres_conf(void);
  129. static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt);
  130. static int _gres_find_id(void *x, void *key);
  131. static void _gres_job_list_delete(void *list_element);
  132. extern int _job_alloc(void *job_gres_data, void *node_gres_data,
  133. int node_cnt, int node_offset, uint32_t cpu_cnt,
  134. char *gres_name, uint32_t job_id, char *node_name,
  135. bitstr_t *core_bitmap);
  136. static int _job_config_validate(char *config, uint32_t *gres_cnt,
  137. slurm_gres_context_t *context_ptr);
  138. static int _job_dealloc(void *job_gres_data, void *node_gres_data,
  139. int node_offset, char *gres_name, uint32_t job_id,
  140. char *node_name);
  141. static void _job_state_delete(void *gres_data);
  142. static void * _job_state_dup(void *gres_data);
  143. static void * _job_state_dup2(void *gres_data, int node_index);
  144. static int _job_state_validate(char *config, void **gres_data,
  145. slurm_gres_context_t *gres_name);
  146. extern uint32_t _job_test(void *job_gres_data, void *node_gres_data,
  147. bool use_total_gres, bitstr_t *cpu_bitmap,
  148. int cpu_start_bit, int cpu_end_bit, bool *topo_set,
  149. uint32_t job_id, char *node_name, char *gres_name);
  150. static int _load_gres_plugin(char *plugin_name,
  151. slurm_gres_context_t *plugin_context);
  152. static int _log_gres_slurmd_conf(void *x, void *arg);
  153. static void _my_stat(char *file_name);
  154. static int _node_config_init(char *node_name, char *orig_config,
  155. slurm_gres_context_t *context_ptr,
  156. gres_state_t *gres_ptr);
  157. static int _node_reconfig(char *node_name, char *orig_config,
  158. char **new_config, gres_state_t *gres_ptr,
  159. uint16_t fast_schedule,
  160. slurm_gres_context_t *context_ptr);
  161. static void _node_state_dealloc(gres_state_t *gres_ptr);
  162. static void * _node_state_dup(void *gres_data);
  163. static void _node_state_log(void *gres_data, char *node_name,
  164. char *gres_name);
  165. static int _parse_gres_config(void **dest, slurm_parser_enum_t type,
  166. const char *key, const char *value,
  167. const char *line, char **leftover);
  168. static void _set_gres_cnt(char *orig_config, char **new_config,
  169. uint32_t new_cnt, char *gres_name,
  170. char *gres_name_colon, int gres_name_colon_len);
  171. static int _step_alloc(void *step_gres_data, void *job_gres_data,
  172. int node_offset, int cpu_cnt, char *gres_name,
  173. uint32_t job_id, uint32_t step_id);
  174. static int _step_dealloc(void *step_gres_data, void *job_gres_data,
  175. char *gres_name, uint32_t job_id,
  176. uint32_t step_id);
  177. static void * _step_state_dup(void *gres_data);
  178. static void * _step_state_dup2(void *gres_data, int node_index);
  179. static int _step_state_validate(char *config, void **gres_data,
  180. slurm_gres_context_t *context_ptr);
  181. static uint32_t _step_test(void *step_gres_data, void *job_gres_data,
  182. int node_offset, bool ignore_alloc, char *gres_name,
  183. uint32_t job_id, uint32_t step_id);
  184. static int _strcmp(const char *s1, const char *s2);
  185. static int _unload_gres_plugin(slurm_gres_context_t *plugin_context);
  186. static void _validate_config(slurm_gres_context_t *context_ptr);
  187. static int _validate_file(char *path_name, char *gres_name);
  188. static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr,
  189. int cpus_ctld, char *node_name);
  190. /* Convert a gres_name into a number for faster comparision operations */
  191. static uint32_t _build_id(char *gres_name)
  192. {
  193. int i, j;
  194. uint32_t id = 0;
  195. for (i=0, j=0; gres_name[i]; i++) {
  196. id += (gres_name[i] << j);
  197. j = (j + 8) % 32;
  198. }
  199. return id;
  200. }
  201. static int _gres_find_id(void *x, void *key)
  202. {
  203. uint32_t *plugin_id = (uint32_t *)key;
  204. gres_state_t *state_ptr = (gres_state_t *) x;
  205. if (state_ptr->plugin_id == *plugin_id)
  206. return 1;
  207. return 0;
  208. }
  209. /* Variant of strcmp that will accept NULL string pointers */
  210. static int _strcmp(const char *s1, const char *s2)
  211. {
  212. if ((s1 != NULL) && (s2 == NULL))
  213. return 1;
  214. if ((s1 == NULL) && (s2 == NULL))
  215. return 0;
  216. if ((s1 == NULL) && (s2 != NULL))
  217. return -1;
  218. return strcmp(s1, s2);
  219. }
  220. static int _load_gres_plugin(char *plugin_name,
  221. slurm_gres_context_t *plugin_context)
  222. {
  223. /*
  224. * Must be synchronized with slurm_gres_ops_t above.
  225. */
  226. static const char *syms[] = {
  227. "node_config_load",
  228. "job_set_env",
  229. "step_set_env",
  230. "send_stepd",
  231. "recv_stepd",
  232. };
  233. int n_syms = sizeof(syms) / sizeof(char *);
  234. /* Find the correct plugin */
  235. plugin_context->gres_type = xstrdup("gres/");
  236. xstrcat(plugin_context->gres_type, plugin_name);
  237. plugin_context->plugin_list = NULL;
  238. plugin_context->cur_plugin = PLUGIN_INVALID_HANDLE;
  239. plugin_context->cur_plugin = plugin_load_and_link(
  240. plugin_context->gres_type,
  241. n_syms, syms,
  242. (void **) &plugin_context->ops);
  243. if (plugin_context->cur_plugin != PLUGIN_INVALID_HANDLE)
  244. return SLURM_SUCCESS;
  245. if (errno != EPLUGIN_NOTFOUND) {
  246. error("Couldn't load specified plugin name for %s: %s",
  247. plugin_context->gres_type, plugin_strerror(errno));
  248. return SLURM_ERROR;
  249. }
  250. debug("gres: Couldn't find the specified plugin name for %s looking "
  251. "at all files", plugin_context->gres_type);
  252. /* Get plugin list */
  253. if (plugin_context->plugin_list == NULL) {
  254. char *plugin_dir;
  255. plugin_context->plugin_list = plugrack_create();
  256. if (plugin_context->plugin_list == NULL) {
  257. error("gres: cannot create plugin manager");
  258. return SLURM_ERROR;
  259. }
  260. plugrack_set_major_type(plugin_context->plugin_list,
  261. "gres");
  262. plugrack_set_paranoia(plugin_context->plugin_list,
  263. PLUGRACK_PARANOIA_NONE, 0);
  264. plugin_dir = slurm_get_plugin_dir();
  265. plugrack_read_dir(plugin_context->plugin_list, plugin_dir);
  266. xfree(plugin_dir);
  267. }
  268. plugin_context->cur_plugin = plugrack_use_by_type(
  269. plugin_context->plugin_list,
  270. plugin_context->gres_type );
  271. if (plugin_context->cur_plugin == PLUGIN_INVALID_HANDLE) {
  272. debug("Cannot find plugin of type %s, just track gres counts",
  273. plugin_context->gres_type);
  274. return SLURM_ERROR;
  275. }
  276. /* Dereference the API. */
  277. if (plugin_get_syms(plugin_context->cur_plugin,
  278. n_syms, syms,
  279. (void **) &plugin_context->ops ) < n_syms ) {
  280. error("Incomplete %s plugin detected",
  281. plugin_context->gres_type);
  282. return SLURM_ERROR;
  283. }
  284. return SLURM_SUCCESS;
  285. }
  286. static int _unload_gres_plugin(slurm_gres_context_t *plugin_context)
  287. {
  288. int rc;
  289. /*
  290. * Must check return code here because plugins might still
  291. * be loaded and active.
  292. */
  293. if (plugin_context->plugin_list)
  294. rc = plugrack_destroy(plugin_context->plugin_list);
  295. else {
  296. rc = SLURM_SUCCESS;
  297. plugin_unload(plugin_context->cur_plugin);
  298. }
  299. xfree(plugin_context->gres_name);
  300. xfree(plugin_context->gres_name_colon);
  301. xfree(plugin_context->gres_type);
  302. return rc;
  303. }
  304. /*
  305. * Initialize the gres plugin.
  306. *
  307. * Returns a SLURM errno.
  308. */
  309. extern int gres_plugin_init(void)
  310. {
  311. int i, j, rc = SLURM_SUCCESS;
  312. char *last = NULL, *names, *one_name, *full_name;
  313. if (init_run && (gres_context_cnt >= 0))
  314. return rc;
  315. slurm_mutex_lock(&gres_context_lock);
  316. if (slurm_get_debug_flags() & DEBUG_FLAG_GRES)
  317. gres_debug = true;
  318. else
  319. gres_debug = false;
  320. if (gres_context_cnt >= 0)
  321. goto fini;
  322. gres_plugin_list = slurm_get_gres_plugins();
  323. gres_context_cnt = 0;
  324. if ((gres_plugin_list == NULL) || (gres_plugin_list[0] == '\0'))
  325. goto fini;
  326. gres_context_cnt = 0;
  327. names = xstrdup(gres_plugin_list);
  328. one_name = strtok_r(names, ",", &last);
  329. while (one_name) {
  330. full_name = xstrdup("gres/");
  331. xstrcat(full_name, one_name);
  332. for (i=0; i<gres_context_cnt; i++) {
  333. if (!strcmp(full_name, gres_context[i].gres_type))
  334. break;
  335. }
  336. xfree(full_name);
  337. if (i<gres_context_cnt) {
  338. error("Duplicate plugin %s ignored",
  339. gres_context[i].gres_type);
  340. } else {
  341. xrealloc(gres_context, (sizeof(slurm_gres_context_t) *
  342. (gres_context_cnt + 1)));
  343. (void) _load_gres_plugin(one_name,
  344. gres_context +
  345. gres_context_cnt);
  346. /* Ignore return code.
  347. * Proceed to support gres even without the plugin */
  348. gres_context[gres_context_cnt].gres_name =
  349. xstrdup(one_name);
  350. gres_context[gres_context_cnt].plugin_id =
  351. _build_id(one_name);
  352. gres_context_cnt++;
  353. }
  354. one_name = strtok_r(NULL, ",", &last);
  355. }
  356. xfree(names);
  357. /* Insure that plugin_id is valid and unique */
  358. for (i=0; i<gres_context_cnt; i++) {
  359. for (j=i+1; j<gres_context_cnt; j++) {
  360. if (gres_context[i].plugin_id !=
  361. gres_context[j].plugin_id)
  362. continue;
  363. fatal("Gres: Duplicate plugin_id %u for %s and %s, "
  364. "change gres name for one of them",
  365. gres_context[i].plugin_id,
  366. gres_context[i].gres_type,
  367. gres_context[j].gres_type);
  368. }
  369. xassert(gres_context[i].gres_name);
  370. gres_context[i].gres_name_colon =
  371. xstrdup_printf("%s:", gres_context[i].gres_name);
  372. gres_context[i].gres_name_colon_len =
  373. strlen(gres_context[i].gres_name_colon);
  374. }
  375. init_run = true;
  376. fini: slurm_mutex_unlock(&gres_context_lock);
  377. return rc;
  378. }
  379. /*
  380. * Terminate the gres plugin. Free memory.
  381. *
  382. * Returns a SLURM errno.
  383. */
  384. extern int gres_plugin_fini(void)
  385. {
  386. int i, j, rc = SLURM_SUCCESS;
  387. slurm_mutex_lock(&gres_context_lock);
  388. if (gres_context_cnt < 0)
  389. goto fini;
  390. init_run = false;
  391. for (i=0; i<gres_context_cnt; i++) {
  392. j = _unload_gres_plugin(gres_context + i);
  393. if (j != SLURM_SUCCESS)
  394. rc = j;
  395. }
  396. xfree(gres_context);
  397. xfree(gres_plugin_list);
  398. FREE_NULL_LIST(gres_conf_list);
  399. gres_context_cnt = -1;
  400. fini: slurm_mutex_unlock(&gres_context_lock);
  401. return rc;
  402. }
  403. /*
  404. **************************************************************************
  405. * P L U G I N C A L L S *
  406. **************************************************************************
  407. */
  408. /*
  409. * Provide a plugin-specific help message for salloc, sbatch and srun
  410. * IN/OUT msg - buffer provided by caller and filled in by plugin
  411. * IN msg_size - size of msg buffer in bytes
  412. */
  413. extern int gres_plugin_help_msg(char *msg, int msg_size)
  414. {
  415. int i, rc;
  416. char *header = "Valid gres options are:\n";
  417. if (msg_size < 1)
  418. return EINVAL;
  419. msg[0] = '\0';
  420. rc = gres_plugin_init();
  421. if ((strlen(header) + 2) <= msg_size)
  422. strcat(msg, header);
  423. slurm_mutex_lock(&gres_context_lock);
  424. for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  425. if ((strlen(msg) + strlen(gres_context[i].gres_name) + 9) >
  426. msg_size)
  427. break;
  428. strcat(msg, gres_context[i].gres_name);
  429. strcat(msg, "[:count]\n");
  430. }
  431. slurm_mutex_unlock(&gres_context_lock);
  432. return rc;
  433. }
  434. /*
  435. * Perform reconfig, re-read any configuration files
  436. * OUT did_change - set if gres configuration changed
  437. */
  438. extern int gres_plugin_reconfig(bool *did_change)
  439. {
  440. int rc = SLURM_SUCCESS;
  441. char *plugin_names = slurm_get_gres_plugins();
  442. bool plugin_change;
  443. if (did_change)
  444. *did_change = false;
  445. slurm_mutex_lock(&gres_context_lock);
  446. if (slurm_get_debug_flags() & DEBUG_FLAG_GRES)
  447. gres_debug = true;
  448. else
  449. gres_debug = false;
  450. if (_strcmp(plugin_names, gres_plugin_list))
  451. plugin_change = true;
  452. else
  453. plugin_change = false;
  454. slurm_mutex_unlock(&gres_context_lock);
  455. if (plugin_change) {
  456. error("GresPlugins changed from %s to %s ignored",
  457. gres_plugin_list, plugin_names);
  458. error("Restart the slurmctld daemon to change GresPlugins");
  459. if (did_change)
  460. *did_change = true;
  461. #if 0
  462. /* This logic would load new plugins, but we need the old
  463. * plugins to persist in order to process old state
  464. * information. */
  465. rc = gres_plugin_fini();
  466. if (rc == SLURM_SUCCESS)
  467. rc = gres_plugin_init();
  468. #endif
  469. }
  470. xfree(plugin_names);
  471. return rc;
  472. }
  473. /*
  474. * Return the pathname of the gres.conf file
  475. */
  476. static char *_get_gres_conf(void)
  477. {
  478. char *val = getenv("SLURM_CONF");
  479. char *rc = NULL;
  480. int i;
  481. if (!val)
  482. return xstrdup(GRES_CONFIG_FILE);
  483. /* Replace file name on end of path */
  484. i = strlen(val) - strlen("slurm.conf") + strlen("gres.conf") + 1;
  485. rc = xmalloc(i);
  486. strcpy(rc, val);
  487. val = strrchr(rc, (int)'/');
  488. if (val) /* absolute path */
  489. val++;
  490. else /* not absolute path */
  491. val = rc;
  492. strcpy(val, "gres.conf");
  493. return rc;
  494. }
  495. /*
  496. * Destroy a gres_slurmd_conf_t record, free it's memory
  497. */
  498. static void _destroy_gres_slurmd_conf(void *x)
  499. {
  500. gres_slurmd_conf_t *p = (gres_slurmd_conf_t *) x;
  501. xassert(p);
  502. xfree(p->cpus);
  503. xfree(p->file); /* Only used by slurmd */
  504. xfree(p->name);
  505. xfree(p);
  506. }
  507. /*
  508. * Log the contents of a gres_slurmd_conf_t record
  509. */
  510. static int _log_gres_slurmd_conf(void *x, void *arg)
  511. {
  512. gres_slurmd_conf_t *p;
  513. p = (gres_slurmd_conf_t *) x;
  514. xassert(p);
  515. if (!gres_debug) {
  516. verbose("Gres Name=%s Count=%u", p->name, p->count);
  517. return 0;
  518. }
  519. if (p->cpus) {
  520. info("Gres Name=%s Count=%u ID=%u File=%s CPUs=%s CpuCnt=%u",
  521. p->name, p->count, p->plugin_id, p->file, p->cpus,
  522. p->cpu_cnt);
  523. } else if (p->file) {
  524. info("Gres Name=%s Count=%u ID=%u File=%s",
  525. p->name, p->count, p->plugin_id, p->file);
  526. } else {
  527. info("Gres Name=%s Count=%u ID=%u", p->name, p->count,
  528. p->plugin_id);
  529. }
  530. return 0;
  531. }
  532. static void _my_stat(char *file_name)
  533. {
  534. struct stat config_stat;
  535. if (stat(file_name, &config_stat) < 0)
  536. fatal("can't stat gres.conf file %s: %m", file_name);
  537. }
  538. static int _validate_file(char *path_name, char *gres_name)
  539. {
  540. char *file_name, *slash, *one_name, *root_path;
  541. char *formatted_path = NULL;
  542. hostlist_t hl;
  543. int i, file_count = 0;
  544. i = strlen(path_name);
  545. if ((i < 3) || (path_name[i-1] != ']')) {
  546. _my_stat(path_name);
  547. return 1;
  548. }
  549. slash = strrchr(path_name, '/');
  550. if (slash) {
  551. i = strlen(path_name);
  552. formatted_path = xmalloc(i+1);
  553. slash[0] = '\0';
  554. root_path = xstrdup(path_name);
  555. xstrcat(root_path, "/");
  556. slash[0] = '/';
  557. file_name = slash + 1;
  558. } else {
  559. file_name = path_name;
  560. root_path = NULL;
  561. }
  562. hl = hostlist_create(file_name);
  563. if (hl == NULL)
  564. fatal("can't parse File=%s", path_name);
  565. while ((one_name = hostlist_shift(hl))) {
  566. if (slash) {
  567. sprintf(formatted_path, "%s/%s", root_path, one_name);
  568. _my_stat(formatted_path);
  569. } else {
  570. _my_stat(one_name);
  571. }
  572. file_count++;
  573. free(one_name);
  574. }
  575. hostlist_destroy(hl);
  576. xfree(formatted_path);
  577. xfree(root_path);
  578. return file_count;
  579. }
  580. /*
  581. * Build gres_slurmd_conf_t record based upon a line from the gres.conf file
  582. */
  583. static int _parse_gres_config(void **dest, slurm_parser_enum_t type,
  584. const char *key, const char *value,
  585. const char *line, char **leftover)
  586. {
  587. static s_p_options_t _gres_options[] = {
  588. {"Count", S_P_STRING}, /* Number of Gres available */
  589. {"CPUs" , S_P_STRING}, /* CPUs to bind to Gres resource */
  590. {"File", S_P_STRING}, /* Path to Gres device */
  591. {NULL}
  592. };
  593. int i;
  594. s_p_hashtbl_t *tbl;
  595. gres_slurmd_conf_t *p;
  596. long tmp_long;
  597. char *tmp_str, *last;
  598. tbl = s_p_hashtbl_create(_gres_options);
  599. s_p_parse_line(tbl, *leftover, leftover);
  600. p = xmalloc(sizeof(gres_slurmd_conf_t));
  601. p->name = xstrdup(value);
  602. p->cpu_cnt = gres_cpu_cnt;
  603. if (s_p_get_string(&p->cpus, "CPUs", tbl)) {
  604. bitstr_t *cpu_bitmap; /* Just use to validate config */
  605. cpu_bitmap = bit_alloc(gres_cpu_cnt);
  606. if (cpu_bitmap == NULL)
  607. fatal("bit_alloc: malloc failure");
  608. i = bit_unfmt(cpu_bitmap, p->cpus);
  609. if (i != 0) {
  610. fatal("Invalid gres data for %s, CPUs=%s (only %u CPUs"
  611. " are available)",
  612. p->name, p->cpus, gres_cpu_cnt);
  613. }
  614. FREE_NULL_BITMAP(cpu_bitmap);
  615. }
  616. if (s_p_get_string(&p->file, "File", tbl)) {
  617. p->count = _validate_file(p->file, p->name);
  618. p->has_file = 1;
  619. }
  620. if (s_p_get_string(&tmp_str, "Count", tbl)) {
  621. tmp_long = strtol(tmp_str, &last, 10);
  622. if ((tmp_long == LONG_MIN) || (tmp_long == LONG_MAX)) {
  623. fatal("Invalid gres data for %s, Count=%s", p->name,
  624. tmp_str);
  625. }
  626. if ((last[0] == 'k') || (last[0] == 'K'))
  627. tmp_long *= 1024;
  628. else if ((last[0] == 'm') || (last[0] == 'M'))
  629. tmp_long *= (1024 * 1024);
  630. else if ((last[0] == 'g') || (last[0] == 'G'))
  631. tmp_long *= (1024 * 1024 * 1024);
  632. else if (last[0] != '\0') {
  633. fatal("Invalid gres data for %s, Count=%s", p->name,
  634. tmp_str);
  635. }
  636. if (p->count && (p->count != tmp_long)) {
  637. fatal("Invalid gres data for %s, Count does not match "
  638. "File value", p->name);
  639. }
  640. if ((tmp_long < 0) || (tmp_long >= NO_VAL)) {
  641. fatal("Gres %s has invalid count value %ld",
  642. p->name, tmp_long);
  643. }
  644. p->count = tmp_long;
  645. xfree(tmp_str);
  646. } else if (p->count == 0)
  647. p->count = 1;
  648. s_p_hashtbl_destroy(tbl);
  649. for (i=0; i<gres_context_cnt; i++) {
  650. if (strcasecmp(value, gres_context[i].gres_name) == 0)
  651. break;
  652. }
  653. if (i >= gres_context_cnt) {
  654. error("Ignoring gres.conf Name=%s", value);
  655. _destroy_gres_slurmd_conf(p);
  656. return 0;
  657. }
  658. p->plugin_id = gres_context[i].plugin_id;
  659. *dest = (void *)p;
  660. return 1;
  661. }
  662. static void _validate_config(slurm_gres_context_t *context_ptr)
  663. {
  664. ListIterator iter;
  665. gres_slurmd_conf_t *gres_slurmd_conf;
  666. int has_file = -1, rec_count = 0;
  667. iter = list_iterator_create(gres_conf_list);
  668. if (iter == NULL)
  669. fatal("list_iterator_create: malloc failure");
  670. while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) {
  671. if (gres_slurmd_conf->plugin_id != context_ptr->plugin_id)
  672. continue;
  673. rec_count++;
  674. if (has_file == -1)
  675. has_file = (int) gres_slurmd_conf->has_file;
  676. else if (( has_file && !gres_slurmd_conf->has_file) ||
  677. (!has_file && gres_slurmd_conf->has_file)) {
  678. fatal("gres.conf for %s, some records have File "
  679. "specification while others do not",
  680. context_ptr->gres_name);
  681. }
  682. if ((has_file == 0) && (rec_count > 1)) {
  683. fatal("gres.conf duplicate records for %s",
  684. context_ptr->gres_name);
  685. }
  686. }
  687. list_iterator_destroy(iter);
  688. }
  689. extern int gres_plugin_node_config_devices_path(char **dev_path,
  690. char **gres_name,
  691. int array_len)
  692. {
  693. static s_p_options_t _gres_options[] = {
  694. {"Name", S_P_ARRAY, _parse_gres_config, NULL},
  695. {NULL}
  696. };
  697. int count, i;
  698. struct stat config_stat;
  699. s_p_hashtbl_t *tbl;
  700. gres_slurmd_conf_t **gres_array;
  701. char *gres_conf_file;
  702. gres_plugin_init();
  703. gres_conf_file = _get_gres_conf();
  704. if (stat(gres_conf_file, &config_stat) < 0) {
  705. error("can't stat gres.conf file %s: %m", gres_conf_file);
  706. xfree(gres_conf_file);
  707. return 0;
  708. }
  709. slurm_mutex_lock(&gres_context_lock);
  710. tbl = s_p_hashtbl_create(_gres_options);
  711. if (s_p_parse_file(tbl, NULL, gres_conf_file, false) == SLURM_ERROR)
  712. fatal("error opening/reading %s", gres_conf_file);
  713. FREE_NULL_LIST(gres_conf_list);
  714. gres_conf_list = list_create(_destroy_gres_slurmd_conf);
  715. if (gres_conf_list == NULL)
  716. fatal("list_create: malloc failure");
  717. if (s_p_get_array((void ***) &gres_array, &count, "Name", tbl)) {
  718. if (count > array_len) {
  719. error("GRES device count exceeds array size (%d > %d)",
  720. count, array_len);
  721. count = array_len;
  722. }
  723. for (i = 0; i < count; i++) {
  724. if ((gres_array[i]) && (gres_array[i]->file)) {
  725. dev_path[i] = gres_array[i]->file;
  726. gres_name[i] = gres_array[i]->name;
  727. gres_array[i] = NULL;
  728. }
  729. }
  730. }
  731. s_p_hashtbl_destroy(tbl);
  732. slurm_mutex_unlock(&gres_context_lock);
  733. xfree(gres_conf_file);
  734. return count;
  735. }
  736. /* No gres.conf file found.
  737. * Initialize gres table with zero counts of all resources.
  738. * Counts can be altered by node_config_load() in the gres plugin. */
  739. static int _no_gres_conf(uint32_t cpu_cnt)
  740. {
  741. int i, rc = SLURM_SUCCESS;
  742. gres_slurmd_conf_t *p;
  743. slurm_mutex_lock(&gres_context_lock);
  744. FREE_NULL_LIST(gres_conf_list);
  745. gres_conf_list = list_create(_destroy_gres_slurmd_conf);
  746. if (gres_conf_list == NULL)
  747. fatal("list_create: malloc failure");
  748. p = xmalloc(sizeof(gres_slurmd_conf_t *) * gres_context_cnt);
  749. for (i = 0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  750. p = xmalloc(sizeof(gres_slurmd_conf_t));
  751. p->cpu_cnt = cpu_cnt;
  752. p->name = xstrdup(gres_context[i].gres_name);
  753. p->plugin_id = gres_context[i].plugin_id;
  754. list_append(gres_conf_list, p);
  755. rc = (*(gres_context[i].ops.node_config_load))(gres_conf_list);
  756. }
  757. slurm_mutex_unlock(&gres_context_lock);
  758. return rc;
  759. }
  760. /*
  761. * Load this node's configuration (how many resources it has, topology, etc.)
  762. * IN cpu_cnt - Number of CPUs on configured on this node
  763. */
  764. extern int gres_plugin_node_config_load(uint32_t cpu_cnt)
  765. {
  766. static s_p_options_t _gres_options[] = {
  767. {"Name", S_P_ARRAY, _parse_gres_config, NULL},
  768. {NULL}
  769. };
  770. int count, i, rc;
  771. struct stat config_stat;
  772. s_p_hashtbl_t *tbl;
  773. gres_slurmd_conf_t **gres_array;
  774. char *gres_conf_file;
  775. rc = gres_plugin_init();
  776. if (gres_context_cnt == 0)
  777. return SLURM_SUCCESS;
  778. gres_conf_file = _get_gres_conf();
  779. if (stat(gres_conf_file, &config_stat) < 0) {
  780. error("can't stat gres.conf file %s, assuming zero resource "
  781. "counts", gres_conf_file);
  782. xfree(gres_conf_file);
  783. return _no_gres_conf(cpu_cnt);
  784. }
  785. slurm_mutex_lock(&gres_context_lock);
  786. gres_cpu_cnt = cpu_cnt;
  787. tbl = s_p_hashtbl_create(_gres_options);
  788. if (s_p_parse_file(tbl, NULL, gres_conf_file, false) == SLURM_ERROR)
  789. fatal("error opening/reading %s", gres_conf_file);
  790. FREE_NULL_LIST(gres_conf_list);
  791. gres_conf_list = list_create(_destroy_gres_slurmd_conf);
  792. if (gres_conf_list == NULL)
  793. fatal("list_create: malloc failure");
  794. if (s_p_get_array((void ***) &gres_array, &count, "Name", tbl)) {
  795. for (i = 0; i < count; i++) {
  796. list_append(gres_conf_list, gres_array[i]);
  797. gres_array[i] = NULL;
  798. }
  799. }
  800. s_p_hashtbl_destroy(tbl);
  801. list_for_each(gres_conf_list, _log_gres_slurmd_conf, NULL);
  802. for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  803. _validate_config(&gres_context[i]);
  804. if (gres_context[i].ops.node_config_load == NULL)
  805. continue; /* No plugin */
  806. rc = (*(gres_context[i].ops.node_config_load))(gres_conf_list);
  807. }
  808. slurm_mutex_unlock(&gres_context_lock);
  809. xfree(gres_conf_file);
  810. return rc;
  811. }
  812. /*
  813. * Pack this node's gres configuration into a buffer
  814. * IN/OUT buffer - message buffer to pack
  815. */
  816. extern int gres_plugin_node_config_pack(Buf buffer)
  817. {
  818. int rc;
  819. uint32_t magic = GRES_MAGIC;
  820. uint16_t rec_cnt = 0, version= SLURM_PROTOCOL_VERSION;
  821. ListIterator iter;
  822. gres_slurmd_conf_t *gres_slurmd_conf;
  823. rc = gres_plugin_init();
  824. slurm_mutex_lock(&gres_context_lock);
  825. pack16(version, buffer);
  826. if (gres_conf_list)
  827. rec_cnt = list_count(gres_conf_list);
  828. pack16(rec_cnt, buffer);
  829. if (rec_cnt) {
  830. iter = list_iterator_create(gres_conf_list);
  831. if (iter == NULL)
  832. fatal("list_iterator_create: malloc failure");
  833. while ((gres_slurmd_conf =
  834. (gres_slurmd_conf_t *) list_next(iter))) {
  835. pack32(magic, buffer);
  836. pack32(gres_slurmd_conf->count, buffer);
  837. pack32(gres_slurmd_conf->cpu_cnt, buffer);
  838. pack8(gres_slurmd_conf->has_file, buffer);
  839. pack32(gres_slurmd_conf->plugin_id, buffer);
  840. packstr(gres_slurmd_conf->cpus, buffer);
  841. packstr(gres_slurmd_conf->name, buffer);
  842. }
  843. list_iterator_destroy(iter);
  844. }
  845. slurm_mutex_unlock(&gres_context_lock);
  846. return rc;
  847. }
  848. /*
  849. * Unpack this node's configuration from a buffer (built/packed by slurmd)
  850. * IN/OUT buffer - message buffer to unpack
  851. * IN node_name - name of node whose data is being unpacked
  852. */
  853. extern int gres_plugin_node_config_unpack(Buf buffer, char* node_name)
  854. {
  855. int i, j, rc;
  856. uint32_t count, cpu_cnt, magic, plugin_id, utmp32;
  857. uint16_t rec_cnt, version;
  858. uint8_t has_file;
  859. char *tmp_cpus, *tmp_name;
  860. gres_slurmd_conf_t *p;
  861. rc = gres_plugin_init();
  862. FREE_NULL_LIST(gres_conf_list);
  863. gres_conf_list = list_create(_destroy_gres_slurmd_conf);
  864. if (gres_conf_list == NULL)
  865. fatal("list_create: malloc failure");
  866. safe_unpack16(&version, buffer);
  867. if (version != SLURM_PROTOCOL_VERSION)
  868. return SLURM_ERROR;
  869. safe_unpack16(&rec_cnt, buffer);
  870. if (rec_cnt == 0)
  871. return SLURM_SUCCESS;
  872. slurm_mutex_lock(&gres_context_lock);
  873. for (i=0; i<rec_cnt; i++) {
  874. safe_unpack32(&magic, buffer);
  875. if (magic != GRES_MAGIC)
  876. goto unpack_error;
  877. safe_unpack32(&count, buffer);
  878. safe_unpack32(&cpu_cnt, buffer);
  879. safe_unpack8(&has_file, buffer);
  880. safe_unpack32(&plugin_id, buffer);
  881. safe_unpackstr_xmalloc(&tmp_cpus, &utmp32, buffer);
  882. safe_unpackstr_xmalloc(&tmp_name, &utmp32, buffer);
  883. for (j=0; j<gres_context_cnt; j++) {
  884. if (gres_context[j].plugin_id != plugin_id)
  885. continue;
  886. if (strcmp(gres_context[j].gres_name, tmp_name)) {
  887. /* Should be caught in gres_plugin_init() */
  888. error("gres_plugin_node_config_unpack: gres/%s"
  889. " duplicate plugin ID with %s, unable "
  890. "to process",
  891. tmp_name, gres_context[j].gres_name);
  892. continue;
  893. }
  894. if (gres_context[j].has_file && !has_file && count) {
  895. error("gres_plugin_node_config_unpack: gres/%s"
  896. " lacks File parameter for node %s",
  897. tmp_name, node_name);
  898. has_file = 1;
  899. }
  900. if (has_file && (count > 1024)) {
  901. /* Avoid over-subscribing memory with huge
  902. * bitmaps */
  903. error("gres_plugin_node_config_unpack: gres/%s"
  904. " has File plus very large Count (%u) "
  905. "for node %s, resetting value to 1024",
  906. tmp_name, count, node_name);
  907. count = 1024;
  908. }
  909. gres_context[j].has_file = has_file;
  910. break;
  911. }
  912. if (j >= gres_context_cnt) {
  913. /* A sign that GresPlugins is inconsistently
  914. * configured. Not a fatal error. Skip this data. */
  915. error("gres_plugin_node_config_unpack: no plugin "
  916. "configured to unpack data type %s from node %s",
  917. tmp_name, node_name);
  918. xfree(tmp_cpus);
  919. xfree(tmp_name);
  920. continue;
  921. }
  922. p = xmalloc(sizeof(gres_slurmd_conf_t));
  923. p->count = count;
  924. p->cpu_cnt = cpu_cnt;
  925. p->has_file = has_file;
  926. p->cpus = tmp_cpus;
  927. tmp_cpus = NULL; /* Nothing left to xfree */
  928. p->name = tmp_name; /* We need to preserve for accounting! */
  929. p->plugin_id = plugin_id;
  930. list_append(gres_conf_list, p);
  931. }
  932. slurm_mutex_unlock(&gres_context_lock);
  933. return rc;
  934. unpack_error:
  935. error("gres_plugin_node_config_unpack: unpack error from node %s",
  936. node_name);
  937. xfree(tmp_cpus);
  938. xfree(tmp_name);
  939. slurm_mutex_unlock(&gres_context_lock);
  940. return SLURM_ERROR;
  941. }
  942. /*
  943. * Delete an element placed on gres_list by _node_config_validate()
  944. * free associated memory
  945. */
  946. static void _gres_node_list_delete(void *list_element)
  947. {
  948. int i;
  949. gres_state_t *gres_ptr;
  950. gres_node_state_t *gres_node_ptr;
  951. gres_ptr = (gres_state_t *) list_element;
  952. gres_node_ptr = (gres_node_state_t *) gres_ptr->gres_data;
  953. FREE_NULL_BITMAP(gres_node_ptr->gres_bit_alloc);
  954. for (i=0; i<gres_node_ptr->topo_cnt; i++) {
  955. FREE_NULL_BITMAP(gres_node_ptr->topo_cpus_bitmap[i]);
  956. FREE_NULL_BITMAP(gres_node_ptr->topo_gres_bitmap[i]);
  957. }
  958. xfree(gres_node_ptr->topo_cpus_bitmap);
  959. xfree(gres_node_ptr->topo_gres_bitmap);
  960. xfree(gres_node_ptr->topo_gres_cnt_alloc);
  961. xfree(gres_node_ptr->topo_gres_cnt_avail);
  962. xfree(gres_node_ptr);
  963. xfree(gres_ptr);
  964. }
  965. static uint32_t _get_gres_cnt(char *orig_config, char *gres_name,
  966. char *gres_name_colon, int gres_name_colon_len)
  967. {
  968. char *node_gres_config, *tok, *last_num = NULL, *last_tok = NULL;
  969. uint32_t gres_config_cnt = 0;
  970. if (orig_config == NULL)
  971. return gres_config_cnt;
  972. node_gres_config = xstrdup(orig_config);
  973. tok = strtok_r(node_gres_config, ",", &last_tok);
  974. while (tok) {
  975. if (!strcmp(tok, gres_name)) {
  976. gres_config_cnt = 1;
  977. break;
  978. }
  979. if (!strncmp(tok, gres_name_colon, gres_name_colon_len)) {
  980. tok += gres_name_colon_len;
  981. gres_config_cnt = strtol(tok, &last_num, 10);
  982. if (last_num[0] == '\0')
  983. ;
  984. else if ((last_num[0] == 'k') || (last_num[0] == 'K'))
  985. gres_config_cnt *= 1024;
  986. else if ((last_num[0] == 'm') || (last_num[0] == 'M'))
  987. gres_config_cnt *= (1024 * 1024);
  988. else if ((last_num[0] == 'g') || (last_num[0] == 'G'))
  989. gres_config_cnt *= (1024 * 1024 * 1024);
  990. break;
  991. }
  992. tok = strtok_r(NULL, ",", &last_tok);
  993. }
  994. xfree(node_gres_config);
  995. return gres_config_cnt;
  996. }
  997. static void _set_gres_cnt(char *orig_config, char **new_config,
  998. uint32_t new_cnt, char *gres_name,
  999. char *gres_name_colon, int gres_name_colon_len)
  1000. {
  1001. char *new_configured_res = NULL, *node_gres_config;
  1002. char *last_tok = NULL, *tok;
  1003. if (*new_config)
  1004. node_gres_config = xstrdup(*new_config);
  1005. else if (orig_config)
  1006. node_gres_config = xstrdup(orig_config);
  1007. else
  1008. return;
  1009. tok = strtok_r(node_gres_config, ",", &last_tok);
  1010. while (tok) {
  1011. if (new_configured_res)
  1012. xstrcat(new_configured_res, ",");
  1013. if (strcmp(tok, gres_name) &&
  1014. strncmp(tok, gres_name_colon, gres_name_colon_len)) {
  1015. xstrcat(new_configured_res, tok);
  1016. } else if ((new_cnt % (1024 * 1024 * 1024)) == 0) {
  1017. new_cnt /= (1024 * 1024 * 1024);
  1018. xstrfmtcat(new_configured_res, "%s:%uG",
  1019. gres_name, new_cnt);
  1020. } else if ((new_cnt % (1024 * 1024)) == 0) {
  1021. new_cnt /= (1024 * 1024);
  1022. xstrfmtcat(new_configured_res, "%s:%uM",
  1023. gres_name, new_cnt);
  1024. } else if ((new_cnt % 1024) == 0) {
  1025. new_cnt /= 1024;
  1026. xstrfmtcat(new_configured_res, "%s:%uK",
  1027. gres_name, new_cnt);
  1028. } else {
  1029. xstrfmtcat(new_configured_res, "%s:%u",
  1030. gres_name, new_cnt);
  1031. }
  1032. tok = strtok_r(NULL, ",", &last_tok);
  1033. }
  1034. xfree(node_gres_config);
  1035. xfree(*new_config);
  1036. *new_config = new_configured_res;
  1037. }
  1038. static gres_node_state_t *_build_gres_node_state(void)
  1039. {
  1040. gres_node_state_t *gres_data;
  1041. gres_data = xmalloc(sizeof(gres_node_state_t));
  1042. gres_data->gres_cnt_config = NO_VAL;
  1043. gres_data->gres_cnt_found = NO_VAL;
  1044. return gres_data;
  1045. }
  1046. /*
  1047. * Build a node's gres record based only upon the slurm.conf contents
  1048. */
  1049. static int _node_config_init(char *node_name, char *orig_config,
  1050. slurm_gres_context_t *context_ptr,
  1051. gres_state_t *gres_ptr)
  1052. {
  1053. int rc = SLURM_SUCCESS;
  1054. uint32_t gres_config_cnt = 0;
  1055. bool updated_config = false;
  1056. gres_node_state_t *gres_data;
  1057. if (gres_ptr->gres_data == NULL) {
  1058. gres_ptr->gres_data = _build_gres_node_state();
  1059. updated_config = true;
  1060. }
  1061. gres_data = (gres_node_state_t *) gres_ptr->gres_data;
  1062. /* If the resource isn't configured for use with this node*/
  1063. if ((orig_config == NULL) || (orig_config[0] == '\0') ||
  1064. (updated_config == false)) {
  1065. gres_data->gres_cnt_config = 0;
  1066. return rc;
  1067. }
  1068. gres_config_cnt = _get_gres_cnt(orig_config,
  1069. context_ptr->gres_name,
  1070. context_ptr->gres_name_colon,
  1071. context_ptr->gres_name_colon_len);
  1072. gres_data->gres_cnt_config = gres_config_cnt;
  1073. /* Use count from recovered state, if higher */
  1074. gres_data->gres_cnt_avail = MAX(gres_data->gres_cnt_avail,
  1075. gres_config_cnt);
  1076. if ((gres_data->gres_bit_alloc != NULL) &&
  1077. (gres_data->gres_cnt_avail >
  1078. bit_size(gres_data->gres_bit_alloc))) {
  1079. gres_data->gres_bit_alloc =
  1080. bit_realloc(gres_data->gres_bit_alloc,
  1081. gres_data->gres_cnt_avail);
  1082. if (gres_data->gres_bit_alloc == NULL)
  1083. fatal("bit_alloc: malloc failure");
  1084. }
  1085. return rc;
  1086. }
  1087. /*
  1088. * Build a node's gres record based only upon the slurm.conf contents
  1089. * IN node_name - name of the node for which the gres information applies
  1090. * IN orig_config - Gres information supplied from slurm.conf
  1091. * IN/OUT gres_list - List of Gres records for this node to track usage
  1092. */
  1093. extern int gres_plugin_init_node_config(char *node_name, char *orig_config,
  1094. List *gres_list)
  1095. {
  1096. int i, rc;
  1097. ListIterator gres_iter;
  1098. gres_state_t *gres_ptr;
  1099. rc = gres_plugin_init();
  1100. slurm_mutex_lock(&gres_context_lock);
  1101. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  1102. *gres_list = list_create(_gres_node_list_delete);
  1103. if (*gres_list == NULL)
  1104. fatal("list_create malloc failure");
  1105. }
  1106. for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  1107. /* Find or create gres_state entry on the list */
  1108. gres_iter = list_iterator_create(*gres_list);
  1109. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1110. if (gres_ptr->plugin_id == gres_context[i].plugin_id)
  1111. break;
  1112. }
  1113. list_iterator_destroy(gres_iter);
  1114. if (gres_ptr == NULL) {
  1115. gres_ptr = xmalloc(sizeof(gres_state_t));
  1116. gres_ptr->plugin_id = gres_context[i].plugin_id;
  1117. list_append(*gres_list, gres_ptr);
  1118. }
  1119. rc = _node_config_init(node_name, orig_config,
  1120. &gres_context[i], gres_ptr);
  1121. }
  1122. slurm_mutex_unlock(&gres_context_lock);
  1123. return rc;
  1124. }
  1125. /*
  1126. * Determine gres availability on some node
  1127. * plugin_id IN - plugin number to search for
  1128. * set_cnt OUT - count of gres.conf records of this id found by slurmd
  1129. * (each can have different topology)
  1130. * RET - total number of gres available of this ID on this node in (sum
  1131. * across all records of this ID)
  1132. */
  1133. static uint32_t _get_tot_gres_cnt(uint32_t plugin_id, uint32_t *set_cnt)
  1134. {
  1135. ListIterator iter;
  1136. gres_slurmd_conf_t *gres_slurmd_conf;
  1137. uint32_t gres_cnt = 0, cpu_set_cnt = 0, rec_cnt = 0;
  1138. xassert(set_cnt);
  1139. *set_cnt = 0;
  1140. if (gres_conf_list == NULL)
  1141. return gres_cnt;
  1142. iter = list_iterator_create(gres_conf_list);
  1143. if (iter == NULL)
  1144. fatal("list_iterator_create: malloc failure");
  1145. while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) {
  1146. if (gres_slurmd_conf->plugin_id != plugin_id)
  1147. continue;
  1148. gres_cnt += gres_slurmd_conf->count;
  1149. rec_cnt++;
  1150. if (gres_slurmd_conf->cpus)
  1151. cpu_set_cnt++;
  1152. }
  1153. list_iterator_destroy(iter);
  1154. if (cpu_set_cnt)
  1155. *set_cnt = rec_cnt;
  1156. return gres_cnt;
  1157. }
  1158. /*
  1159. * Map a given GRES type ID back to a GRES type name.
  1160. * gres_id IN - GRES type ID to search for.
  1161. * gres_name IN - Pre-allocated string in which to store the GRES type name.
  1162. * gres_name_len - Size of gres_name in bytes
  1163. * RET - error code (currently not used--always return SLURM_SUCCESS)
  1164. */
  1165. extern int gres_gresid_to_gresname(uint32_t gres_id, char* gres_name,
  1166. int gres_name_len)
  1167. {
  1168. ListIterator iter;
  1169. gres_slurmd_conf_t *gres_slurmd_conf;
  1170. int rc = SLURM_SUCCESS;
  1171. int found = 0;
  1172. if (gres_conf_list == NULL) {
  1173. /* Should not reach this as if there are GRES id's then there
  1174. * must have been a gres_conf_list.
  1175. */
  1176. info("%s--The gres_conf_list is NULL!!!\n", __FUNCTION__);
  1177. snprintf(gres_name, gres_name_len, "%u", gres_id);
  1178. return rc;
  1179. }
  1180. iter = list_iterator_create(gres_conf_list);
  1181. if (iter == NULL)
  1182. fatal("list_iterator_create: malloc failure");
  1183. while ((gres_slurmd_conf = (gres_slurmd_conf_t *) list_next(iter))) {
  1184. if (gres_slurmd_conf->plugin_id != gres_id)
  1185. continue;
  1186. strncpy(gres_name, gres_slurmd_conf->name, gres_name_len);
  1187. found = 1;
  1188. break;
  1189. }
  1190. list_iterator_destroy(iter);
  1191. if (!found) /* Could not find GRES type name, use id */
  1192. snprintf(gres_name, gres_name_len, "%u", gres_id);
  1193. return rc;
  1194. }
  1195. extern int _node_config_validate(char *node_name, char *orig_config,
  1196. char **new_config, gres_state_t *gres_ptr,
  1197. uint16_t fast_schedule, char **reason_down,
  1198. slurm_gres_context_t *context_ptr)
  1199. {
  1200. int i, j, gres_inx, rc = SLURM_SUCCESS;
  1201. uint32_t gres_cnt, set_cnt = 0;
  1202. bool updated_config = false;
  1203. gres_node_state_t *gres_data;
  1204. ListIterator iter;
  1205. gres_slurmd_conf_t *gres_slurmd_conf;
  1206. if (gres_ptr->gres_data == NULL)
  1207. gres_ptr->gres_data = _build_gres_node_state();
  1208. gres_data = (gres_node_state_t *) gres_ptr->gres_data;
  1209. gres_cnt = _get_tot_gres_cnt(context_ptr->plugin_id, &set_cnt);
  1210. if (gres_data->gres_cnt_found != gres_cnt) {
  1211. if (gres_data->gres_cnt_found != NO_VAL) {
  1212. info("%s: count changed for node %s from %u to %u",
  1213. context_ptr->gres_type, node_name,
  1214. gres_data->gres_cnt_found, gres_cnt);
  1215. }
  1216. gres_data->gres_cnt_found = gres_cnt;
  1217. updated_config = true;
  1218. }
  1219. if (updated_config == false)
  1220. return SLURM_SUCCESS;
  1221. if ((set_cnt == 0) && (set_cnt != gres_data->topo_cnt)) {
  1222. /* Need to clear topology info */
  1223. xfree(gres_data->topo_gres_cnt_alloc);
  1224. xfree(gres_data->topo_gres_cnt_avail);
  1225. for (i=0; i<gres_data->topo_cnt; i++) {
  1226. FREE_NULL_BITMAP(gres_data->topo_gres_bitmap[i]);
  1227. FREE_NULL_BITMAP(gres_data->topo_cpus_bitmap[i]);
  1228. }
  1229. xfree(gres_data->topo_gres_bitmap);
  1230. xfree(gres_data->topo_cpus_bitmap);
  1231. gres_data->topo_cnt = set_cnt;
  1232. }
  1233. if (context_ptr->has_file && (set_cnt != gres_data->topo_cnt)) {
  1234. /* Need to rebuild topology info */
  1235. /* Resize the data structures here */
  1236. gres_data->topo_gres_cnt_alloc =
  1237. xrealloc(gres_data->topo_gres_cnt_alloc,
  1238. set_cnt * sizeof(uint32_t));
  1239. if (gres_data->topo_gres_cnt_alloc == NULL)
  1240. fatal("xrealloc: malloc failure");
  1241. gres_data->topo_gres_cnt_avail =
  1242. xrealloc(gres_data->topo_gres_cnt_avail,
  1243. set_cnt * sizeof(uint32_t));
  1244. if (gres_data->topo_gres_cnt_alloc == NULL)
  1245. fatal("xrealloc: malloc failure");
  1246. for (i=0; i<gres_data->topo_cnt; i++)
  1247. FREE_NULL_BITMAP(gres_data->topo_gres_bitmap[i]);
  1248. gres_data->topo_gres_bitmap =
  1249. xrealloc(gres_data->topo_gres_bitmap,
  1250. set_cnt * sizeof(bitstr_t *));
  1251. if (gres_data->topo_gres_bitmap == NULL)
  1252. fatal("xrealloc: malloc failure");
  1253. for (i=0; i<gres_data->topo_cnt; i++)
  1254. FREE_NULL_BITMAP(gres_data->topo_cpus_bitmap[i]);
  1255. gres_data->topo_cpus_bitmap =
  1256. xrealloc(gres_data->topo_cpus_bitmap,
  1257. set_cnt * sizeof(bitstr_t *));
  1258. if (gres_data->topo_cpus_bitmap == NULL)
  1259. fatal("xrealloc: malloc failure");
  1260. gres_data->topo_cnt = set_cnt;
  1261. iter = list_iterator_create(gres_conf_list);
  1262. if (iter == NULL)
  1263. fatal("list_iterator_create: malloc failure");
  1264. gres_inx = i = 0;
  1265. while ((gres_slurmd_conf = (gres_slurmd_conf_t *)
  1266. list_next(iter))) {
  1267. if (gres_slurmd_conf->plugin_id !=
  1268. context_ptr->plugin_id)
  1269. continue;
  1270. gres_data->topo_gres_cnt_avail[i] =
  1271. gres_slurmd_conf->count;
  1272. gres_data->topo_cpus_bitmap[i] =
  1273. bit_alloc(gres_slurmd_conf->cpu_cnt);
  1274. if (gres_data->topo_cpus_bitmap[i] == NULL)
  1275. fatal("bit_alloc: malloc failure");
  1276. if (gres_slurmd_conf->cpus) {
  1277. bit_unfmt(gres_data->topo_cpus_bitmap[i],
  1278. gres_slurmd_conf->cpus);
  1279. } else {
  1280. error("%s: has CPUs configured for only some "
  1281. "of the records on node %s",
  1282. context_ptr->gres_type, node_name);
  1283. bit_nset(gres_data->topo_cpus_bitmap[i], 0,
  1284. (gres_slurmd_conf->cpu_cnt - 1));
  1285. }
  1286. gres_data->topo_gres_bitmap[i] = bit_alloc(gres_cnt);
  1287. if (gres_data->topo_gres_bitmap[i] == NULL)
  1288. fatal("bit_alloc: malloc failure");
  1289. for (j=0; j<gres_slurmd_conf->count; j++) {
  1290. bit_set(gres_data->topo_gres_bitmap[i],
  1291. gres_inx++);
  1292. }
  1293. i++;
  1294. }
  1295. list_iterator_destroy(iter);
  1296. }
  1297. if ((orig_config == NULL) || (orig_config[0] == '\0'))
  1298. gres_data->gres_cnt_config = 0;
  1299. else if (gres_data->gres_cnt_config == NO_VAL) {
  1300. /* This should have been filled in by _node_config_init() */
  1301. gres_data->gres_cnt_config =
  1302. _get_gres_cnt(orig_config, context_ptr->gres_name,
  1303. context_ptr->gres_name_colon,
  1304. context_ptr->gres_name_colon_len);
  1305. }
  1306. if ((gres_data->gres_cnt_config == 0) || (fast_schedule > 0))
  1307. gres_data->gres_cnt_avail = gres_data->gres_cnt_config;
  1308. else if (gres_data->gres_cnt_found != NO_VAL)
  1309. gres_data->gres_cnt_avail = gres_data->gres_cnt_found;
  1310. else if (gres_data->gres_cnt_avail == NO_VAL)
  1311. gres_data->gres_cnt_avail = 0;
  1312. if (context_ptr->has_file) {
  1313. if (gres_data->gres_bit_alloc == NULL) {
  1314. gres_data->gres_bit_alloc =
  1315. bit_alloc(gres_data->gres_cnt_avail);
  1316. } else if (gres_data->gres_cnt_avail !=
  1317. bit_size(gres_data->gres_bit_alloc)) {
  1318. gres_data->gres_bit_alloc =
  1319. bit_realloc(gres_data->gres_bit_alloc,
  1320. gres_data->gres_cnt_avail);
  1321. }
  1322. if (gres_data->gres_bit_alloc == NULL)
  1323. fatal("bit_alloc: malloc failure");
  1324. }
  1325. if ((fast_schedule < 2) &&
  1326. (gres_data->gres_cnt_found < gres_data->gres_cnt_config)) {
  1327. if (reason_down && (*reason_down == NULL)) {
  1328. xstrfmtcat(*reason_down, "%s count too low (%u < %u)",
  1329. context_ptr->gres_type,
  1330. gres_data->gres_cnt_found,
  1331. gres_data->gres_cnt_config);
  1332. }
  1333. rc = EINVAL;
  1334. } else if ((fast_schedule == 2) && gres_data->topo_cnt &&
  1335. (gres_data->gres_cnt_found != gres_data->gres_cnt_config)) {
  1336. error("%s on node %s configured for %u resources but %u found,"
  1337. " ignoring topology support",
  1338. context_ptr->gres_type, node_name,
  1339. gres_data->gres_cnt_config, gres_data->gres_cnt_found);
  1340. if (gres_data->topo_cpus_bitmap) {
  1341. for (i=0; i<gres_data->topo_cnt; i++) {
  1342. FREE_NULL_BITMAP(gres_data->topo_cpus_bitmap[i]);
  1343. FREE_NULL_BITMAP(gres_data->topo_gres_bitmap[i]);
  1344. }
  1345. xfree(gres_data->topo_cpus_bitmap);
  1346. xfree(gres_data->topo_gres_bitmap);
  1347. xfree(gres_data->topo_gres_cnt_alloc);
  1348. xfree(gres_data->topo_gres_cnt_avail);
  1349. }
  1350. gres_data->topo_cnt = 0;
  1351. } else if ((fast_schedule == 0) &&
  1352. (gres_data->gres_cnt_found > gres_data->gres_cnt_config)) {
  1353. /* need to rebuild new_config */
  1354. _set_gres_cnt(orig_config, new_config,
  1355. gres_data->gres_cnt_found,
  1356. context_ptr->gres_name,
  1357. context_ptr->gres_name_colon,
  1358. context_ptr->gres_name_colon_len);
  1359. }
  1360. return rc;
  1361. }
  1362. /*
  1363. * Validate a node's configuration and put a gres record onto a list
  1364. * Called immediately after gres_plugin_node_config_unpack().
  1365. * IN node_name - name of the node for which the gres information applies
  1366. * IN orig_config - Gres information supplied from slurm.conf
  1367. * IN/OUT new_config - Updated gres info from slurm.conf if FastSchedule=0
  1368. * IN/OUT gres_list - List of Gres records for this node to track usage
  1369. * IN fast_schedule - 0: Validate and use actual hardware configuration
  1370. * 1: Validate hardware config, but use slurm.conf config
  1371. * 2: Don't validate hardware, use slurm.conf configuration
  1372. * OUT reason_down - set to an explanation of failure, if any, don't set if NULL
  1373. */
  1374. extern int gres_plugin_node_config_validate(char *node_name,
  1375. char *orig_config,
  1376. char **new_config,
  1377. List *gres_list,
  1378. uint16_t fast_schedule,
  1379. char **reason_down)
  1380. {
  1381. int i, rc, rc2;
  1382. ListIterator gres_iter;
  1383. gres_state_t *gres_ptr;
  1384. rc = gres_plugin_init();
  1385. slurm_mutex_lock(&gres_context_lock);
  1386. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  1387. *gres_list = list_create(_gres_node_list_delete);
  1388. if (*gres_list == NULL)
  1389. fatal("list_create malloc failure");
  1390. }
  1391. for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  1392. /* Find or create gres_state entry on the list */
  1393. gres_iter = list_iterator_create(*gres_list);
  1394. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1395. if (gres_ptr->plugin_id == gres_context[i].plugin_id)
  1396. break;
  1397. }
  1398. list_iterator_destroy(gres_iter);
  1399. if (gres_ptr == NULL) {
  1400. gres_ptr = xmalloc(sizeof(gres_state_t));
  1401. gres_ptr->plugin_id = gres_context[i].plugin_id;
  1402. list_append(*gres_list, gres_ptr);
  1403. }
  1404. rc2 = _node_config_validate(node_name, orig_config, new_config,
  1405. gres_ptr, fast_schedule,
  1406. reason_down, &gres_context[i]);
  1407. rc = MAX(rc, rc2);
  1408. }
  1409. slurm_mutex_unlock(&gres_context_lock);
  1410. return rc;
  1411. }
  1412. static int _node_reconfig(char *node_name, char *orig_config, char **new_config,
  1413. gres_state_t *gres_ptr, uint16_t fast_schedule,
  1414. slurm_gres_context_t *context_ptr)
  1415. {
  1416. int rc = SLURM_SUCCESS;
  1417. gres_node_state_t *gres_data;
  1418. xassert(gres_ptr);
  1419. if (gres_ptr->gres_data == NULL)
  1420. gres_ptr->gres_data = _build_gres_node_state();
  1421. gres_data = gres_ptr->gres_data;
  1422. gres_data->gres_cnt_config = _get_gres_cnt(orig_config,
  1423. context_ptr->gres_name,
  1424. context_ptr->gres_name_colon,
  1425. context_ptr->
  1426. gres_name_colon_len);
  1427. if ((gres_data->gres_cnt_config == 0) || (fast_schedule > 0))
  1428. gres_data->gres_cnt_avail = gres_data->gres_cnt_config;
  1429. else if (gres_data->gres_cnt_found != NO_VAL)
  1430. gres_data->gres_cnt_avail = gres_data->gres_cnt_found;
  1431. else if (gres_data->gres_cnt_avail == NO_VAL)
  1432. gres_data->gres_cnt_avail = 0;
  1433. if (context_ptr->has_file) {
  1434. if (gres_data->gres_bit_alloc == NULL) {
  1435. gres_data->gres_bit_alloc =
  1436. bit_alloc(gres_data->gres_cnt_avail);
  1437. } else if (gres_data->gres_cnt_avail !=
  1438. bit_size(gres_data->gres_bit_alloc)) {
  1439. gres_data->gres_bit_alloc =
  1440. bit_realloc(gres_data->gres_bit_alloc,
  1441. gres_data->gres_cnt_avail);
  1442. }
  1443. if (gres_data->gres_bit_alloc == NULL)
  1444. fatal("bit_alloc: malloc failure");
  1445. }
  1446. if ((fast_schedule < 2) &&
  1447. (gres_data->gres_cnt_found != NO_VAL) &&
  1448. (gres_data->gres_cnt_found < gres_data->gres_cnt_config)) {
  1449. /* Do not set node DOWN, but give the node
  1450. * a chance to register with more resources */
  1451. gres_data->gres_cnt_found = NO_VAL;
  1452. } else if ((fast_schedule == 0) &&
  1453. (gres_data->gres_cnt_found != NO_VAL) &&
  1454. (gres_data->gres_cnt_found > gres_data->gres_cnt_config)) {
  1455. _set_gres_cnt(orig_config, new_config,
  1456. gres_data->gres_cnt_found,
  1457. context_ptr->gres_name,
  1458. context_ptr->gres_name_colon,
  1459. context_ptr->gres_name_colon_len);
  1460. }
  1461. return rc;
  1462. }
  1463. /*
  1464. * Note that a node's configuration has been modified (e.g. "scontol update ..")
  1465. * IN node_name - name of the node for which the gres information applies
  1466. * IN orig_config - Gres information supplied from slurm.conf
  1467. * IN/OUT new_config - Updated gres info from slurm.conf if FastSchedule=0
  1468. * IN/OUT gres_list - List of Gres records for this node to track usage
  1469. * IN fast_schedule - 0: Validate and use actual hardware configuration
  1470. * 1: Validate hardware config, but use slurm.conf config
  1471. * 2: Don't validate hardware, use slurm.conf configuration
  1472. */
  1473. extern int gres_plugin_node_reconfig(char *node_name,
  1474. char *orig_config,
  1475. char **new_config,
  1476. List *gres_list,
  1477. uint16_t fast_schedule)
  1478. {
  1479. int i, rc, rc2;
  1480. ListIterator gres_iter;
  1481. gres_state_t *gres_ptr;
  1482. rc = gres_plugin_init();
  1483. slurm_mutex_lock(&gres_context_lock);
  1484. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  1485. *gres_list = list_create(_gres_node_list_delete);
  1486. if (*gres_list == NULL)
  1487. fatal("list_create malloc failure");
  1488. }
  1489. for (i=0; ((i < gres_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
  1490. /* Find gres_state entry on the list */
  1491. gres_iter = list_iterator_create(*gres_list);
  1492. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1493. if (gres_ptr->plugin_id == gres_context[i].plugin_id)
  1494. break;
  1495. }
  1496. list_iterator_destroy(gres_iter);
  1497. if (gres_ptr == NULL)
  1498. continue;
  1499. rc2 = _node_reconfig(node_name, orig_config, new_config,
  1500. gres_ptr, fast_schedule, &gres_context[i]);
  1501. rc = MAX(rc, rc2);
  1502. }
  1503. slurm_mutex_unlock(&gres_context_lock);
  1504. return rc;
  1505. }
  1506. /*
  1507. * Pack a node's current gres status, called from slurmctld for save/restore
  1508. * IN gres_list - generated by gres_plugin_node_config_validate()
  1509. * IN/OUT buffer - location to write state to
  1510. * IN node_name - name of the node for which the gres information applies
  1511. */
  1512. extern int gres_plugin_node_state_pack(List gres_list, Buf buffer,
  1513. char *node_name)
  1514. {
  1515. int rc = SLURM_SUCCESS;
  1516. uint32_t top_offset, tail_offset;
  1517. uint32_t magic = GRES_MAGIC;
  1518. uint16_t rec_cnt = 0;
  1519. uint8_t has_bitmap;
  1520. ListIterator gres_iter;
  1521. gres_state_t *gres_ptr;
  1522. gres_node_state_t *gres_node_ptr;
  1523. if (gres_list == NULL) {
  1524. pack16(rec_cnt, buffer);
  1525. return rc;
  1526. }
  1527. top_offset = get_buf_offset(buffer);
  1528. pack16(rec_cnt, buffer); /* placeholder if data */
  1529. if (gres_list == NULL)
  1530. return rc;
  1531. (void) gres_plugin_init();
  1532. slurm_mutex_lock(&gres_context_lock);
  1533. gres_iter = list_iterator_create(gres_list);
  1534. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1535. gres_node_ptr = (gres_node_state_t *) gres_ptr->gres_data;
  1536. pack32(magic, buffer);
  1537. pack32(gres_ptr->plugin_id, buffer);
  1538. pack32(gres_node_ptr->gres_cnt_avail, buffer);
  1539. /* Just note if gres_bit_alloc exists.
  1540. * Rebuild it based upon the state of recovered jobs */
  1541. if (gres_node_ptr->gres_bit_alloc)
  1542. has_bitmap = 1;
  1543. else
  1544. has_bitmap = 0;
  1545. pack8(has_bitmap, buffer);
  1546. rec_cnt++;
  1547. break;
  1548. }
  1549. list_iterator_destroy(gres_iter);
  1550. slurm_mutex_unlock(&gres_context_lock);
  1551. tail_offset = get_buf_offset(buffer);
  1552. set_buf_offset(buffer, top_offset);
  1553. pack16(rec_cnt, buffer);
  1554. set_buf_offset(buffer, tail_offset);
  1555. return rc;
  1556. }
  1557. /*
  1558. * Unpack a node's current gres status, called from slurmctld for save/restore
  1559. * OUT gres_list - restored state stored by gres_plugin_node_state_pack()
  1560. * IN/OUT buffer - location to read state from
  1561. * IN node_name - name of the node for which the gres information applies
  1562. */
  1563. extern int gres_plugin_node_state_unpack(List *gres_list, Buf buffer,
  1564. char *node_name,
  1565. uint16_t protocol_version)
  1566. {
  1567. int i, rc;
  1568. uint32_t gres_cnt_avail, magic, plugin_id;
  1569. uint16_t rec_cnt;
  1570. uint8_t has_bitmap;
  1571. gres_state_t *gres_ptr;
  1572. gres_node_state_t *gres_node_ptr;
  1573. safe_unpack16(&rec_cnt, buffer);
  1574. if (rec_cnt == 0)
  1575. return SLURM_SUCCESS;
  1576. rc = gres_plugin_init();
  1577. slurm_mutex_lock(&gres_context_lock);
  1578. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  1579. *gres_list = list_create(_gres_node_list_delete);
  1580. if (*gres_list == NULL)
  1581. fatal("list_create malloc failure");
  1582. }
  1583. while ((rc == SLURM_SUCCESS) && (rec_cnt)) {
  1584. if ((buffer == NULL) || (remaining_buf(buffer) == 0))
  1585. break;
  1586. rec_cnt--;
  1587. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  1588. safe_unpack32(&magic, buffer);
  1589. if (magic != GRES_MAGIC)
  1590. goto unpack_error;
  1591. safe_unpack32(&plugin_id, buffer);
  1592. safe_unpack32(&gres_cnt_avail, buffer);
  1593. safe_unpack8(&has_bitmap, buffer);
  1594. } else {
  1595. error("gres_plugin_node_state_unpack: protocol_version"
  1596. " %hu not supported", protocol_version);
  1597. goto unpack_error;
  1598. }
  1599. for (i=0; i<gres_context_cnt; i++) {
  1600. if (gres_context[i].plugin_id == plugin_id)
  1601. break;
  1602. }
  1603. if (i >= gres_context_cnt) {
  1604. error("gres_plugin_node_state_unpack: no plugin "
  1605. "configured to unpack data type %u from node %s",
  1606. plugin_id, node_name);
  1607. /* A likely sign that GresPlugins has changed.
  1608. * Not a fatal error, skip over the data. */
  1609. continue;
  1610. }
  1611. gres_node_ptr = _build_gres_node_state();
  1612. gres_node_ptr->gres_cnt_avail = gres_cnt_avail;
  1613. if (has_bitmap) {
  1614. gres_node_ptr->gres_bit_alloc =
  1615. bit_alloc(gres_cnt_avail);
  1616. if (gres_node_ptr->gres_bit_alloc == NULL)
  1617. fatal("bit_alloc: malloc failure");
  1618. }
  1619. gres_ptr = xmalloc(sizeof(gres_state_t));
  1620. gres_ptr->plugin_id = gres_context[i].plugin_id;
  1621. gres_ptr->gres_data = gres_node_ptr;
  1622. list_append(*gres_list, gres_ptr);
  1623. }
  1624. slurm_mutex_unlock(&gres_context_lock);
  1625. return rc;
  1626. unpack_error:
  1627. error("gres_plugin_node_state_unpack: unpack error from node %s",
  1628. node_name);
  1629. slurm_mutex_unlock(&gres_context_lock);
  1630. return SLURM_ERROR;
  1631. }
  1632. static void *_node_state_dup(void *gres_data)
  1633. {
  1634. int i;
  1635. gres_node_state_t *gres_ptr = (gres_node_state_t *) gres_data;
  1636. gres_node_state_t *new_gres;
  1637. if (gres_ptr == NULL)
  1638. return NULL;
  1639. new_gres = xmalloc(sizeof(gres_node_state_t));
  1640. new_gres->gres_cnt_found = gres_ptr->gres_cnt_found;
  1641. new_gres->gres_cnt_config = gres_ptr->gres_cnt_config;
  1642. new_gres->gres_cnt_avail = gres_ptr->gres_cnt_avail;
  1643. new_gres->gres_cnt_alloc = gres_ptr->gres_cnt_alloc;
  1644. if (gres_ptr->gres_bit_alloc)
  1645. new_gres->gres_bit_alloc = bit_copy(gres_ptr->gres_bit_alloc);
  1646. if (gres_ptr->topo_cnt == 0)
  1647. return new_gres;
  1648. new_gres->topo_cnt = gres_ptr->topo_cnt;
  1649. new_gres->topo_cpus_bitmap = xmalloc(gres_ptr->topo_cnt *
  1650. sizeof(bitstr_t *));
  1651. new_gres->topo_gres_bitmap = xmalloc(gres_ptr->topo_cnt *
  1652. sizeof(bitstr_t *));
  1653. new_gres->topo_gres_cnt_alloc = xmalloc(gres_ptr->topo_cnt *
  1654. sizeof(uint32_t));
  1655. new_gres->topo_gres_cnt_avail = xmalloc(gres_ptr->topo_cnt *
  1656. sizeof(uint32_t));
  1657. for (i=0; i<gres_ptr->topo_cnt; i++) {
  1658. new_gres->topo_cpus_bitmap[i] =
  1659. bit_copy(gres_ptr->topo_cpus_bitmap[i]);
  1660. new_gres->topo_gres_bitmap[i] =
  1661. bit_copy(gres_ptr->topo_gres_bitmap[i]);
  1662. if ((new_gres->topo_cpus_bitmap[i] == NULL) ||
  1663. (new_gres->topo_gres_bitmap[i] == NULL))
  1664. fatal("bit_copy: malloc failure");
  1665. new_gres->topo_gres_cnt_alloc[i] =
  1666. gres_ptr->topo_gres_cnt_alloc[i];
  1667. new_gres->topo_gres_cnt_avail[i] =
  1668. gres_ptr->topo_gres_cnt_avail[i];
  1669. }
  1670. return new_gres;
  1671. }
  1672. /*
  1673. * Duplicate a node gres status (used for will-run logic)
  1674. * IN gres_list - node gres state information
  1675. * RET a copy of gres_list or NULL on failure
  1676. */
  1677. extern List gres_plugin_node_state_dup(List gres_list)
  1678. {
  1679. int i;
  1680. List new_list = NULL;
  1681. ListIterator gres_iter;
  1682. gres_state_t *gres_ptr, *new_gres;
  1683. void *gres_data;
  1684. if (gres_list == NULL)
  1685. return new_list;
  1686. (void) gres_plugin_init();
  1687. slurm_mutex_lock(&gres_context_lock);
  1688. if ((gres_context_cnt > 0)) {
  1689. new_list = list_create(_gres_node_list_delete);
  1690. if (new_list == NULL)
  1691. fatal("list_create malloc failure");
  1692. }
  1693. gres_iter = list_iterator_create(gres_list);
  1694. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1695. for (i=0; i<gres_context_cnt; i++) {
  1696. if (gres_ptr->plugin_id != gres_context[i].plugin_id)
  1697. continue;
  1698. gres_data = _node_state_dup(gres_ptr->gres_data);
  1699. if (gres_data) {
  1700. new_gres = xmalloc(sizeof(gres_state_t));
  1701. new_gres->plugin_id = gres_ptr->plugin_id;
  1702. new_gres->gres_data = gres_data;
  1703. list_append(new_list, new_gres);
  1704. }
  1705. break;
  1706. }
  1707. if (i >= gres_context_cnt) {
  1708. error("Could not find plugin id %u to dup node record",
  1709. gres_ptr->plugin_id);
  1710. }
  1711. }
  1712. list_iterator_destroy(gres_iter);
  1713. slurm_mutex_unlock(&gres_context_lock);
  1714. return new_list;
  1715. }
  1716. static void _node_state_dealloc(gres_state_t *gres_ptr)
  1717. {
  1718. int i;
  1719. gres_node_state_t *gres_node_ptr;
  1720. char *gres_name = NULL;
  1721. gres_node_ptr = (gres_node_state_t *) gres_ptr->gres_data;
  1722. gres_node_ptr->gres_cnt_alloc = 0;
  1723. if (gres_node_ptr->gres_bit_alloc) {
  1724. int i = bit_size(gres_node_ptr->gres_bit_alloc) - 1;
  1725. if (i >= 0)
  1726. bit_nclear(gres_node_ptr->gres_bit_alloc, 0, i);
  1727. }
  1728. if (gres_node_ptr->topo_cnt && !gres_node_ptr->topo_gres_cnt_alloc) {
  1729. for (i=0; i<gres_context_cnt; i++) {
  1730. if (gres_ptr->plugin_id == gres_context[i].plugin_id) {
  1731. gres_name = gres_context[i].gres_name;
  1732. break;
  1733. }
  1734. }
  1735. error("gres_plugin_node_state_dealloc_all: gres/%s topo_cnt!=0 "
  1736. "and topo_gres_cnt_alloc is NULL", gres_name);
  1737. } else {
  1738. for (i=0; i<gres_node_ptr->topo_cnt; i++) {
  1739. gres_node_ptr->topo_gres_cnt_alloc[i] = 0;
  1740. }
  1741. }
  1742. }
  1743. /*
  1744. * Deallocate all resources on this node previous allocated to any jobs.
  1745. * This function isused to synchronize state after slurmctld restarts or
  1746. * is reconfigured.
  1747. * IN gres_list - node gres state information
  1748. */
  1749. extern void gres_plugin_node_state_dealloc_all(List gres_list)
  1750. {
  1751. ListIterator gres_iter;
  1752. gres_state_t *gres_ptr;
  1753. if (gres_list == NULL)
  1754. return;
  1755. (void) gres_plugin_init();
  1756. slurm_mutex_lock(&gres_context_lock);
  1757. gres_iter = list_iterator_create(gres_list);
  1758. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1759. _node_state_dealloc(gres_ptr);
  1760. }
  1761. list_iterator_destroy(gres_iter);
  1762. slurm_mutex_unlock(&gres_context_lock);
  1763. }
  1764. static void _node_state_log(void *gres_data, char *node_name, char *gres_name)
  1765. {
  1766. gres_node_state_t *gres_node_ptr;
  1767. int i;
  1768. char tmp_str[128];
  1769. xassert(gres_data);
  1770. gres_node_ptr = (gres_node_state_t *) gres_data;
  1771. info("gres/%s: state for %s", gres_name, node_name);
  1772. info(" gres_cnt found:%u configured:%u avail:%u alloc:%u",
  1773. gres_node_ptr->gres_cnt_found, gres_node_ptr->gres_cnt_config,
  1774. gres_node_ptr->gres_cnt_avail, gres_node_ptr->gres_cnt_alloc);
  1775. if (gres_node_ptr->gres_bit_alloc) {
  1776. bit_fmt(tmp_str, sizeof(tmp_str), gres_node_ptr->gres_bit_alloc);
  1777. info(" gres_bit_alloc:%s", tmp_str);
  1778. } else {
  1779. info(" gres_bit_alloc:NULL");
  1780. }
  1781. for (i=0; i<gres_node_ptr->topo_cnt; i++) {
  1782. if (gres_node_ptr->topo_cpus_bitmap[i]) {
  1783. bit_fmt(tmp_str, sizeof(tmp_str),
  1784. gres_node_ptr->topo_cpus_bitmap[i]);
  1785. info(" topo_cpus_bitmap[%d]:%s", i, tmp_str);
  1786. } else
  1787. info(" topo_cpus_bitmap[%d]:NULL", i);
  1788. if (gres_node_ptr->topo_cpus_bitmap[i]) {
  1789. bit_fmt(tmp_str, sizeof(tmp_str),
  1790. gres_node_ptr->topo_gres_bitmap[i]);
  1791. info(" topo_gres_bitmap[%d]:%s", i, tmp_str);
  1792. } else
  1793. info(" topo_gres_bitmap[%d]:NULL", i);
  1794. info(" topo_gres_cnt_alloc[%d]:%u",i,
  1795. gres_node_ptr->topo_gres_cnt_alloc[i]);
  1796. info(" topo_gres_cnt_avail[%d]:%u",i,
  1797. gres_node_ptr->topo_gres_cnt_avail[i]);
  1798. }
  1799. }
  1800. /*
  1801. * Log a node's current gres state
  1802. * IN gres_list - generated by gres_plugin_node_config_validate()
  1803. * IN node_name - name of the node for which the gres information applies
  1804. */
  1805. extern void gres_plugin_node_state_log(List gres_list, char *node_name)
  1806. {
  1807. int i;
  1808. ListIterator gres_iter;
  1809. gres_state_t *gres_ptr;
  1810. if (!gres_debug || (gres_list == NULL))
  1811. return;
  1812. (void) gres_plugin_init();
  1813. slurm_mutex_lock(&gres_context_lock);
  1814. gres_iter = list_iterator_create(gres_list);
  1815. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  1816. for (i=0; i<gres_context_cnt; i++) {
  1817. if (gres_ptr->plugin_id !=
  1818. gres_context[i].plugin_id)
  1819. continue;
  1820. _node_state_log(gres_ptr->gres_data, node_name,
  1821. gres_context[i].gres_name);
  1822. break;
  1823. }
  1824. }
  1825. list_iterator_destroy(gres_iter);
  1826. slurm_mutex_unlock(&gres_context_lock);
  1827. }
  1828. static void _job_state_delete(void *gres_data)
  1829. {
  1830. int i;
  1831. gres_job_state_t *gres_ptr = (gres_job_state_t *) gres_data;
  1832. if (gres_ptr == NULL)
  1833. return;
  1834. for (i=0; i<gres_ptr->node_cnt; i++) {
  1835. if (gres_ptr->gres_bit_alloc)
  1836. FREE_NULL_BITMAP(gres_ptr->gres_bit_alloc[i]);
  1837. if (gres_ptr->gres_bit_step_alloc)
  1838. FREE_NULL_BITMAP(gres_ptr->gres_bit_step_alloc[i]);
  1839. }
  1840. xfree(gres_ptr->gres_bit_alloc);
  1841. xfree(gres_ptr->gres_bit_step_alloc);
  1842. xfree(gres_ptr->gres_cnt_step_alloc);
  1843. xfree(gres_ptr);
  1844. }
  1845. static void _gres_job_list_delete(void *list_element)
  1846. {
  1847. gres_state_t *gres_ptr;
  1848. if (gres_plugin_init() != SLURM_SUCCESS)
  1849. return;
  1850. gres_ptr = (gres_state_t *) list_element;
  1851. slurm_mutex_lock(&gres_context_lock);
  1852. _job_state_delete(gres_ptr->gres_data);
  1853. xfree(gres_ptr);
  1854. slurm_mutex_unlock(&gres_context_lock);
  1855. }
  1856. static int _job_config_validate(char *config, uint32_t *gres_cnt,
  1857. slurm_gres_context_t *context_ptr)
  1858. {
  1859. char *last_num = NULL;
  1860. int cnt;
  1861. if (!strcmp(config, context_ptr->gres_name)) {
  1862. cnt = 1;
  1863. } else if (!strncmp(config, context_ptr->gres_name_colon,
  1864. context_ptr->gres_name_colon_len)) {
  1865. config += context_ptr->gres_name_colon_len;
  1866. cnt = strtol(config, &last_num, 10);
  1867. if (last_num[0] == '\0')
  1868. ;
  1869. else if ((last_num[0] == 'k') || (last_num[0] == 'K'))
  1870. cnt *= 1024;
  1871. else if ((last_num[0] == 'm') || (last_num[0] == 'M'))
  1872. cnt *= (1024 * 1024);
  1873. else if ((last_num[0] == 'g') || (last_num[0] == 'G'))
  1874. cnt *= (1024 * 1024 * 1024);
  1875. else
  1876. return SLURM_ERROR;
  1877. if (cnt < 0)
  1878. return SLURM_ERROR;
  1879. } else
  1880. return SLURM_ERROR;
  1881. *gres_cnt = (uint32_t) cnt;
  1882. return SLURM_SUCCESS;
  1883. }
  1884. static int _job_state_validate(char *config, void **gres_data,
  1885. slurm_gres_context_t *context_ptr)
  1886. {
  1887. int rc;
  1888. uint32_t gres_cnt;
  1889. rc = _job_config_validate(config, &gres_cnt, context_ptr);
  1890. if ((rc == SLURM_SUCCESS) && (gres_cnt > 0)) {
  1891. gres_job_state_t *gres_ptr;
  1892. gres_ptr = xmalloc(sizeof(gres_job_state_t));
  1893. gres_ptr->gres_cnt_alloc = gres_cnt;
  1894. *gres_data = gres_ptr;
  1895. } else
  1896. *gres_data = NULL;
  1897. return rc;
  1898. }
  1899. /*
  1900. * Given a job's requested gres configuration, validate it and build a gres list
  1901. * IN req_config - job request's gres input string
  1902. * OUT gres_list - List of Gres records for this job to track usage
  1903. * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
  1904. */
  1905. extern int gres_plugin_job_state_validate(char *req_config, List *gres_list)
  1906. {
  1907. char *tmp_str, *tok, *last = NULL;
  1908. int i, rc, rc2;
  1909. gres_state_t *gres_ptr;
  1910. void *job_gres_data;
  1911. if ((req_config == NULL) || (req_config[0] == '\0')) {
  1912. *gres_list = NULL;
  1913. return SLURM_SUCCESS;
  1914. }
  1915. if ((rc = gres_plugin_init()) != SLURM_SUCCESS)
  1916. return rc;
  1917. slurm_mutex_lock(&gres_context_lock);
  1918. tmp_str = xstrdup(req_config);
  1919. tok = strtok_r(tmp_str, ",", &last);
  1920. while (tok && (rc == SLURM_SUCCESS)) {
  1921. rc2 = SLURM_ERROR;
  1922. for (i=0; i<gres_context_cnt; i++) {
  1923. rc2 = _job_state_validate(tok, &job_gres_data,
  1924. &gres_context[i]);
  1925. if ((rc2 != SLURM_SUCCESS) || (job_gres_data == NULL))
  1926. continue;
  1927. if (*gres_list == NULL) {
  1928. *gres_list = list_create(_gres_job_list_delete);
  1929. if (*gres_list == NULL)
  1930. fatal("list_create malloc failure");
  1931. }
  1932. gres_ptr = xmalloc(sizeof(gres_state_t));
  1933. gres_ptr->plugin_id = gres_context[i].plugin_id;
  1934. gres_ptr->gres_data = job_gres_data;
  1935. list_append(*gres_list, gres_ptr);
  1936. break; /* processed it */
  1937. }
  1938. if (rc2 != SLURM_SUCCESS) {
  1939. info("Invalid gres job specification %s", tok);
  1940. rc = ESLURM_INVALID_GRES;
  1941. break;
  1942. }
  1943. tok = strtok_r(NULL, ",", &last);
  1944. }
  1945. slurm_mutex_unlock(&gres_context_lock);
  1946. xfree(tmp_str);
  1947. return rc;
  1948. }
  1949. static void *_job_state_dup(void *gres_data)
  1950. {
  1951. int i;
  1952. gres_job_state_t *gres_ptr = (gres_job_state_t *) gres_data;
  1953. gres_job_state_t *new_gres_ptr;
  1954. if (gres_ptr == NULL)
  1955. return NULL;
  1956. new_gres_ptr = xmalloc(sizeof(gres_job_state_t));
  1957. new_gres_ptr->gres_cnt_alloc = gres_ptr->gres_cnt_alloc;
  1958. new_gres_ptr->node_cnt = gres_ptr->node_cnt;
  1959. if (gres_ptr->gres_bit_alloc) {
  1960. new_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) *
  1961. gres_ptr->node_cnt);
  1962. for (i=0; i<gres_ptr->node_cnt; i++) {
  1963. if (gres_ptr->gres_bit_alloc[i] == NULL)
  1964. continue;
  1965. new_gres_ptr->gres_bit_alloc[i] =
  1966. bit_copy(gres_ptr->gres_bit_alloc[i]);
  1967. }
  1968. }
  1969. return new_gres_ptr;
  1970. }
  1971. static void *_job_state_dup2(void *gres_data, int node_index)
  1972. {
  1973. gres_job_state_t *gres_ptr = (gres_job_state_t *) gres_data;
  1974. gres_job_state_t *new_gres_ptr;
  1975. if (gres_ptr == NULL)
  1976. return NULL;
  1977. new_gres_ptr = xmalloc(sizeof(gres_job_state_t));
  1978. new_gres_ptr->gres_cnt_alloc = gres_ptr->gres_cnt_alloc;
  1979. new_gres_ptr->node_cnt = 1;
  1980. if (gres_ptr->gres_bit_alloc && gres_ptr->gres_bit_alloc[node_index]) {
  1981. new_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *));
  1982. new_gres_ptr->gres_bit_alloc[0] =
  1983. bit_copy(gres_ptr->gres_bit_alloc[node_index]);
  1984. }
  1985. return new_gres_ptr;
  1986. }
  1987. /*
  1988. * Create a (partial) copy of a job's gres state for job binding
  1989. * IN gres_list - List of Gres records for this job to track usage
  1990. * RET The copy or NULL on failure
  1991. * NOTE: Only gres_cnt_alloc, node_cnt and gres_bit_alloc are copied
  1992. * Job step details are NOT copied.
  1993. */
  1994. List gres_plugin_job_state_dup(List gres_list)
  1995. {
  1996. return gres_plugin_job_state_extract(gres_list, -1);
  1997. }
  1998. /*
  1999. * Create a (partial) copy of a job's gres state for a particular node index
  2000. * IN gres_list - List of Gres records for this job to track usage
  2001. * IN node_index - zero-origin index to the node
  2002. * RET The copy or NULL on failure
  2003. */
  2004. List gres_plugin_job_state_extract(List gres_list, int node_index)
  2005. {
  2006. ListIterator gres_iter;
  2007. gres_state_t *gres_ptr, *new_gres_state;
  2008. List new_gres_list = NULL;
  2009. void *new_gres_data;
  2010. if (gres_list == NULL)
  2011. return new_gres_list;
  2012. (void) gres_plugin_init();
  2013. slurm_mutex_lock(&gres_context_lock);
  2014. gres_iter = list_iterator_create(gres_list);
  2015. if (gres_iter == NULL)
  2016. fatal("list_iterator_create: malloc failure");
  2017. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  2018. if (node_index == -1)
  2019. new_gres_data = _job_state_dup(gres_ptr->gres_data);
  2020. else {
  2021. new_gres_data = _job_state_dup2(gres_ptr->gres_data,
  2022. node_index);
  2023. }
  2024. if (new_gres_data == NULL)
  2025. break;
  2026. if (new_gres_list == NULL) {
  2027. new_gres_list = list_create(_gres_job_list_delete);
  2028. if (new_gres_list == NULL)
  2029. fatal("list_create: malloc failure");
  2030. }
  2031. new_gres_state = xmalloc(sizeof(gres_state_t));
  2032. new_gres_state->plugin_id = gres_ptr->plugin_id;
  2033. new_gres_state->gres_data = new_gres_data;
  2034. list_append(new_gres_list, new_gres_state);
  2035. }
  2036. list_iterator_destroy(gres_iter);
  2037. slurm_mutex_unlock(&gres_context_lock);
  2038. return new_gres_list;
  2039. }
  2040. /*
  2041. * Pack a job's current gres status, called from slurmctld for save/restore
  2042. * IN gres_list - generated by gres_plugin_job_config_validate()
  2043. * IN/OUT buffer - location to write state to
  2044. * IN job_id - job's ID
  2045. * IN details - if set then pack job step allocation details (only needed to
  2046. * save/restore job state, not needed in job credential for
  2047. * slurmd task binding)
  2048. *
  2049. * NOTE: A job's allocation to steps is not recorded here, but recovered with
  2050. * the job step state information upon slurmctld restart.
  2051. */
  2052. extern int gres_plugin_job_state_pack(List gres_list, Buf buffer,
  2053. uint32_t job_id, bool details,
  2054. uint16_t protocol_version)
  2055. {
  2056. int i, rc = SLURM_SUCCESS;
  2057. uint32_t top_offset, tail_offset;
  2058. uint32_t magic = GRES_MAGIC;
  2059. uint16_t rec_cnt = 0;
  2060. ListIterator gres_iter;
  2061. gres_state_t *gres_ptr;
  2062. gres_job_state_t *gres_job_ptr;
  2063. top_offset = get_buf_offset(buffer);
  2064. pack16(rec_cnt, buffer); /* placeholder if data */
  2065. if (gres_list == NULL)
  2066. return rc;
  2067. (void) gres_plugin_init();
  2068. slurm_mutex_lock(&gres_context_lock);
  2069. gres_iter = list_iterator_create(gres_list);
  2070. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  2071. gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data;
  2072. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  2073. pack32(magic, buffer);
  2074. pack32(gres_ptr->plugin_id, buffer);
  2075. pack32(gres_job_ptr->gres_cnt_alloc, buffer);
  2076. pack32(gres_job_ptr->node_cnt, buffer);
  2077. if (gres_job_ptr->gres_bit_alloc) {
  2078. pack8((uint8_t) 1, buffer);
  2079. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2080. pack_bit_str(gres_job_ptr->
  2081. gres_bit_alloc[i],
  2082. buffer);
  2083. }
  2084. } else {
  2085. pack8((uint8_t) 0, buffer);
  2086. }
  2087. if (details && gres_job_ptr->gres_bit_step_alloc) {
  2088. pack8((uint8_t) 1, buffer);
  2089. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2090. pack_bit_str(gres_job_ptr->
  2091. gres_bit_step_alloc[i],
  2092. buffer);
  2093. }
  2094. } else {
  2095. pack8((uint8_t) 0, buffer);
  2096. }
  2097. if (details && gres_job_ptr->gres_cnt_step_alloc) {
  2098. pack8((uint8_t) 1, buffer);
  2099. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2100. pack32(gres_job_ptr->
  2101. gres_cnt_step_alloc[i],
  2102. buffer);
  2103. }
  2104. } else {
  2105. pack8((uint8_t) 0, buffer);
  2106. }
  2107. rec_cnt++;
  2108. } else {
  2109. error("gres_plugin_node_state_pack: protocol_version"
  2110. " %hu not supported", protocol_version);
  2111. break;
  2112. }
  2113. }
  2114. list_iterator_destroy(gres_iter);
  2115. slurm_mutex_unlock(&gres_context_lock);
  2116. tail_offset = get_buf_offset(buffer);
  2117. set_buf_offset(buffer, top_offset);
  2118. pack16(rec_cnt, buffer);
  2119. set_buf_offset(buffer, tail_offset);
  2120. return rc;
  2121. }
  2122. /*
  2123. * Unpack a job's current gres status, called from slurmctld for save/restore
  2124. * OUT gres_list - restored state stored by gres_plugin_job_state_pack()
  2125. * IN/OUT buffer - location to read state from
  2126. * IN job_id - job's ID
  2127. */
  2128. extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer,
  2129. uint32_t job_id,
  2130. uint16_t protocol_version)
  2131. {
  2132. int i = 0, rc;
  2133. uint32_t magic, plugin_id;
  2134. uint16_t rec_cnt;
  2135. uint8_t has_more;
  2136. gres_state_t *gres_ptr;
  2137. gres_job_state_t *gres_job_ptr = NULL;
  2138. safe_unpack16(&rec_cnt, buffer);
  2139. if (rec_cnt == 0)
  2140. return SLURM_SUCCESS;
  2141. rc = gres_plugin_init();
  2142. slurm_mutex_lock(&gres_context_lock);
  2143. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  2144. *gres_list = list_create(_gres_job_list_delete);
  2145. if (*gres_list == NULL)
  2146. fatal("list_create malloc failure");
  2147. }
  2148. while ((rc == SLURM_SUCCESS) && (rec_cnt)) {
  2149. if ((buffer == NULL) || (remaining_buf(buffer) == 0))
  2150. break;
  2151. rec_cnt--;
  2152. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  2153. safe_unpack32(&magic, buffer);
  2154. if (magic != GRES_MAGIC)
  2155. goto unpack_error;
  2156. safe_unpack32(&plugin_id, buffer);
  2157. gres_job_ptr = xmalloc(sizeof(gres_job_state_t));
  2158. safe_unpack32(&gres_job_ptr->gres_cnt_alloc, buffer);
  2159. safe_unpack32(&gres_job_ptr->node_cnt, buffer);
  2160. safe_unpack8(&has_more, buffer);
  2161. if (has_more) {
  2162. gres_job_ptr->gres_bit_alloc =
  2163. xmalloc(sizeof(bitstr_t *) *
  2164. gres_job_ptr->node_cnt);
  2165. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2166. unpack_bit_str(&gres_job_ptr->
  2167. gres_bit_alloc[i],
  2168. buffer);
  2169. }
  2170. }
  2171. safe_unpack8(&has_more, buffer);
  2172. if (has_more) {
  2173. gres_job_ptr->gres_bit_step_alloc =
  2174. xmalloc(sizeof(bitstr_t *) *
  2175. gres_job_ptr->node_cnt);
  2176. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2177. unpack_bit_str(&gres_job_ptr->
  2178. gres_bit_step_alloc[i],
  2179. buffer);
  2180. }
  2181. }
  2182. safe_unpack8(&has_more, buffer);
  2183. if (has_more) {
  2184. gres_job_ptr->gres_cnt_step_alloc =
  2185. xmalloc(sizeof(uint32_t) *
  2186. gres_job_ptr->node_cnt);
  2187. for (i=0; i<gres_job_ptr->node_cnt; i++) {
  2188. safe_unpack32(&gres_job_ptr->
  2189. gres_cnt_step_alloc[i],
  2190. buffer);
  2191. }
  2192. }
  2193. } else {
  2194. error("gres_plugin_job_state_unpack: protocol_version"
  2195. " %hu not supported", protocol_version);
  2196. goto unpack_error;
  2197. }
  2198. for (i=0; i<gres_context_cnt; i++) {
  2199. if (gres_context[i].plugin_id == plugin_id)
  2200. break;
  2201. }
  2202. if (i >= gres_context_cnt) {
  2203. /* A likely sign that GresPlugins has changed.
  2204. * Not a fatal error, skip over the data. */
  2205. error("gres_plugin_job_state_unpack: no plugin "
  2206. "configured to unpack data type %u from job %u",
  2207. plugin_id, job_id);
  2208. _job_state_delete(gres_job_ptr);
  2209. continue;
  2210. }
  2211. gres_ptr = xmalloc(sizeof(gres_state_t));
  2212. gres_ptr->plugin_id = gres_context[i].plugin_id;
  2213. gres_ptr->gres_data = gres_job_ptr;
  2214. gres_job_ptr = NULL; /* nothing left to free on error */
  2215. list_append(*gres_list, gres_ptr);
  2216. }
  2217. slurm_mutex_unlock(&gres_context_lock);
  2218. return rc;
  2219. unpack_error:
  2220. error("gres_plugin_job_state_unpack: unpack error from job %u",
  2221. job_id);
  2222. if (gres_job_ptr)
  2223. _job_state_delete(gres_job_ptr);
  2224. slurm_mutex_unlock(&gres_context_lock);
  2225. return SLURM_ERROR;
  2226. }
  2227. /* If CPU bitmap from slurmd differs in size from that in slurmctld,
  2228. * then modify bitmap from slurmd so we can use bit_and, bit_or, etc. */
  2229. static bitstr_t *_cpu_bitmap_rebuild(bitstr_t *old_cpu_bitmap, int new_size)
  2230. {
  2231. int i, j, old_size, ratio;
  2232. bitstr_t *new_cpu_bitmap;
  2233. new_cpu_bitmap = bit_alloc(new_size);
  2234. if (new_cpu_bitmap == NULL)
  2235. fatal("bit_alloc: malloc failure");
  2236. old_size = bit_size(old_cpu_bitmap);
  2237. if (old_size > new_size) {
  2238. ratio = old_size / new_size;
  2239. for (i=0; i<new_size; i++) {
  2240. for (j=0; j<ratio; j++) {
  2241. if (bit_test(old_cpu_bitmap, i*ratio+j)) {
  2242. bit_set(new_cpu_bitmap, i);
  2243. break;
  2244. }
  2245. }
  2246. }
  2247. } else {
  2248. ratio = new_size / old_size;
  2249. for (i=0; i<old_size; i++) {
  2250. if (!bit_test(old_cpu_bitmap, i))
  2251. continue;
  2252. for (j=0; j<ratio; j++) {
  2253. bit_set(new_cpu_bitmap, i*ratio+j);
  2254. }
  2255. }
  2256. }
  2257. return new_cpu_bitmap;
  2258. }
  2259. static void _validate_gres_node_cpus(gres_node_state_t *node_gres_ptr,
  2260. int cpus_ctld, char *node_name)
  2261. {
  2262. int i, cpus_slurmd;
  2263. bitstr_t *new_cpu_bitmap;
  2264. if (node_gres_ptr->topo_cnt == 0)
  2265. return;
  2266. cpus_slurmd = bit_size(node_gres_ptr->topo_cpus_bitmap[0]);
  2267. if (cpus_slurmd == cpus_ctld)
  2268. return;
  2269. debug("Gres CPU count mismatch on node %s (%d != %d)",
  2270. node_name, cpus_slurmd, cpus_ctld);
  2271. for (i=0; i<node_gres_ptr->topo_cnt; i++) {
  2272. if (i != 0) {
  2273. cpus_slurmd = bit_size(node_gres_ptr->
  2274. topo_cpus_bitmap[i]);
  2275. }
  2276. if (cpus_slurmd == cpus_ctld) /* should never happen here */
  2277. continue;
  2278. new_cpu_bitmap = _cpu_bitmap_rebuild(node_gres_ptr->
  2279. topo_cpus_bitmap[i],
  2280. cpus_ctld);
  2281. FREE_NULL_BITMAP(node_gres_ptr->topo_cpus_bitmap[i]);
  2282. node_gres_ptr->topo_cpus_bitmap[i] = new_cpu_bitmap;
  2283. }
  2284. }
  2285. extern uint32_t _job_test(void *job_gres_data, void *node_gres_data,
  2286. bool use_total_gres, bitstr_t *cpu_bitmap,
  2287. int cpu_start_bit, int cpu_end_bit, bool *topo_set,
  2288. uint32_t job_id, char *node_name, char *gres_name)
  2289. {
  2290. int i, j, cpus_ctld, gres_avail = 0, top_inx;
  2291. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  2292. gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data;
  2293. uint32_t *cpus_avail = NULL, cpu_cnt = 0;
  2294. bitstr_t *alloc_cpu_bitmap = NULL;
  2295. if (job_gres_ptr->gres_cnt_alloc && node_gres_ptr->topo_cnt &&
  2296. *topo_set) {
  2297. /* Need to determine how many gres available for these
  2298. * specific CPUs */
  2299. if (cpu_bitmap) {
  2300. cpus_ctld = cpu_end_bit - cpu_start_bit + 1;
  2301. if (cpus_ctld < 1) {
  2302. error("gres/%s: job %u cpus on node %s < 1",
  2303. gres_name, job_id, node_name);
  2304. return (uint32_t) 0;
  2305. }
  2306. _validate_gres_node_cpus(node_gres_ptr, cpus_ctld,
  2307. node_name);
  2308. } else {
  2309. cpus_ctld = bit_size(node_gres_ptr->
  2310. topo_cpus_bitmap[0]);
  2311. }
  2312. for (i=0; i<node_gres_ptr->topo_cnt; i++) {
  2313. for (j=0; j<cpus_ctld; j++) {
  2314. if (cpu_bitmap &&
  2315. !bit_test(cpu_bitmap, cpu_start_bit+j))
  2316. continue;
  2317. if (!bit_test(node_gres_ptr->
  2318. topo_cpus_bitmap[i], j))
  2319. continue; /* not avail for this gres */
  2320. gres_avail += node_gres_ptr->
  2321. topo_gres_cnt_avail[i];
  2322. if (!use_total_gres) {
  2323. gres_avail -= node_gres_ptr->
  2324. topo_gres_cnt_alloc[i];
  2325. }
  2326. break;
  2327. }
  2328. }
  2329. if (job_gres_ptr->gres_cnt_alloc > gres_avail)
  2330. return (uint32_t) 0; /* insufficient, gres to use */
  2331. return NO_VAL;
  2332. } else if (job_gres_ptr->gres_cnt_alloc && node_gres_ptr->topo_cnt) {
  2333. /* Need to determine which specific CPUs can be used */
  2334. gres_avail = node_gres_ptr->gres_cnt_avail;
  2335. if (!use_total_gres)
  2336. gres_avail -= node_gres_ptr->gres_cnt_alloc;
  2337. if (job_gres_ptr->gres_cnt_alloc > gres_avail)
  2338. return (uint32_t) 0; /* insufficient, gres to use */
  2339. if (cpu_bitmap) {
  2340. cpus_ctld = cpu_end_bit - cpu_start_bit + 1;
  2341. if (cpus_ctld < 1) {
  2342. error("gres/%s: job %u cpus on node %s < 1",
  2343. gres_name, job_id, node_name);
  2344. return (uint32_t) 0;
  2345. }
  2346. _validate_gres_node_cpus(node_gres_ptr, cpus_ctld,
  2347. node_name);
  2348. } else {
  2349. cpus_ctld = bit_size(node_gres_ptr->
  2350. topo_cpus_bitmap[0]);
  2351. }
  2352. cpus_avail = xmalloc(sizeof(uint32_t)*node_gres_ptr->topo_cnt);
  2353. for (i=0; i<node_gres_ptr->topo_cnt; i++) {
  2354. if (node_gres_ptr->topo_gres_cnt_avail[i] == 0)
  2355. continue;
  2356. if (!use_total_gres &&
  2357. (node_gres_ptr->topo_gres_cnt_alloc[i] >=
  2358. node_gres_ptr->topo_gres_cnt_avail[i]))
  2359. continue;
  2360. for (j=0; j<cpus_ctld; j++) {
  2361. if (cpu_bitmap &&
  2362. !bit_test(cpu_bitmap, cpu_start_bit+j))
  2363. continue;
  2364. if (bit_test(node_gres_ptr->
  2365. topo_cpus_bitmap[i], j)) {
  2366. cpus_avail[i]++;
  2367. }
  2368. }
  2369. }
  2370. /* Pick the topology entries with the most CPUs available */
  2371. alloc_cpu_bitmap = bit_alloc(cpus_ctld);
  2372. if (alloc_cpu_bitmap == NULL)
  2373. fatal("bit_alloc: malloc failure");
  2374. gres_avail = 0;
  2375. while (gres_avail < job_gres_ptr->gres_cnt_alloc) {
  2376. top_inx = -1;
  2377. for (j=0; j<node_gres_ptr->topo_cnt; j++) {
  2378. if (top_inx == -1) {
  2379. if (cpus_avail[j])
  2380. top_inx = j;
  2381. } else if (cpus_avail[j] > cpus_avail[top_inx])
  2382. top_inx = j;
  2383. }
  2384. if ((top_inx < 0) || (cpus_avail[top_inx] == 0)) {
  2385. cpu_cnt = 0;
  2386. break;
  2387. }
  2388. cpus_avail[top_inx] = 0;
  2389. i = node_gres_ptr->topo_gres_cnt_avail[top_inx];
  2390. if (!use_total_gres)
  2391. i -= node_gres_ptr->topo_gres_cnt_alloc[top_inx];
  2392. if (i < 0) {
  2393. error("gres/%s: topology allocation error on "
  2394. "node %s", gres_name, node_name);
  2395. continue;
  2396. }
  2397. /* update counts of allocated CPUs and GRES */
  2398. gres_avail += i;
  2399. bit_or(alloc_cpu_bitmap,
  2400. node_gres_ptr->topo_cpus_bitmap[top_inx]);
  2401. cpu_cnt = bit_set_count(alloc_cpu_bitmap);
  2402. }
  2403. if (cpu_bitmap && (cpu_cnt > 0)) {
  2404. *topo_set = true;
  2405. for (i=0; i<cpus_ctld; i++) {
  2406. if (!bit_test(alloc_cpu_bitmap, i))
  2407. bit_clear(cpu_bitmap, cpu_start_bit+i);
  2408. }
  2409. }
  2410. FREE_NULL_BITMAP(alloc_cpu_bitmap);
  2411. xfree(cpus_avail);
  2412. return cpu_cnt;
  2413. } else {
  2414. gres_avail = node_gres_ptr->gres_cnt_avail;
  2415. if (!use_total_gres)
  2416. gres_avail -= node_gres_ptr->gres_cnt_alloc;
  2417. if (job_gres_ptr->gres_cnt_alloc > gres_avail)
  2418. return (uint32_t) 0; /* insufficient, gres to use */
  2419. return NO_VAL;
  2420. }
  2421. }
  2422. /*
  2423. * Determine how many CPUs on the node can be used by this job
  2424. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  2425. * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
  2426. * IN use_total_gres - if set then consider all gres resources as available,
  2427. * and none are commited to running jobs
  2428. * IN cpu_bitmap - Identification of available CPUs (NULL if no restriction)
  2429. * IN cpu_start_bit - index into cpu_bitmap for this node's first CPU
  2430. * IN cpu_end_bit - index into cpu_bitmap for this node's last CPU
  2431. * IN job_id - job's ID (for logging)
  2432. * IN node_name - name of the node (for logging)
  2433. * RET: NO_VAL - All CPUs on node are available
  2434. * otherwise - Specific CPU count
  2435. */
  2436. extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
  2437. bool use_total_gres, bitstr_t *cpu_bitmap,
  2438. int cpu_start_bit, int cpu_end_bit,
  2439. uint32_t job_id, char *node_name)
  2440. {
  2441. int i;
  2442. uint32_t cpu_cnt, tmp_cnt;
  2443. ListIterator job_gres_iter, node_gres_iter;
  2444. gres_state_t *job_gres_ptr, *node_gres_ptr;
  2445. bool topo_set = false;
  2446. if (job_gres_list == NULL)
  2447. return NO_VAL;
  2448. if (node_gres_list == NULL)
  2449. return NO_VAL;
  2450. cpu_cnt = NO_VAL;
  2451. (void) gres_plugin_init();
  2452. slurm_mutex_lock(&gres_context_lock);
  2453. job_gres_iter = list_iterator_create(job_gres_list);
  2454. while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) {
  2455. node_gres_iter = list_iterator_create(node_gres_list);
  2456. while ((node_gres_ptr = (gres_state_t *)
  2457. list_next(node_gres_iter))) {
  2458. if (job_gres_ptr->plugin_id == node_gres_ptr->plugin_id)
  2459. break;
  2460. }
  2461. list_iterator_destroy(node_gres_iter);
  2462. if (node_gres_ptr == NULL) {
  2463. /* node lack resources required by the job */
  2464. cpu_cnt = 0;
  2465. break;
  2466. }
  2467. for (i=0; i<gres_context_cnt; i++) {
  2468. if (job_gres_ptr->plugin_id !=
  2469. gres_context[i].plugin_id)
  2470. continue;
  2471. tmp_cnt = _job_test(job_gres_ptr->gres_data,
  2472. node_gres_ptr->gres_data,
  2473. use_total_gres, cpu_bitmap,
  2474. cpu_start_bit, cpu_end_bit,
  2475. &topo_set, job_id, node_name,
  2476. gres_context[i].gres_name);
  2477. if (tmp_cnt != NO_VAL) {
  2478. if (cpu_cnt == NO_VAL)
  2479. cpu_cnt = tmp_cnt;
  2480. else
  2481. cpu_cnt = MIN(tmp_cnt, cpu_cnt);
  2482. }
  2483. break;
  2484. }
  2485. if (cpu_cnt == 0)
  2486. break;
  2487. }
  2488. list_iterator_destroy(job_gres_iter);
  2489. slurm_mutex_unlock(&gres_context_lock);
  2490. return cpu_cnt;
  2491. }
  2492. extern int _job_alloc(void *job_gres_data, void *node_gres_data,
  2493. int node_cnt, int node_offset, uint32_t cpu_cnt,
  2494. char *gres_name, uint32_t job_id, char *node_name,
  2495. bitstr_t *core_bitmap)
  2496. {
  2497. int i;
  2498. uint32_t gres_cnt;
  2499. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  2500. gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data;
  2501. /*
  2502. * Validate data structures. Either job_gres_data->node_cnt and
  2503. * job_gres_data->gres_bit_alloc are both set or both zero/NULL.
  2504. */
  2505. xassert(node_cnt);
  2506. xassert(node_offset >= 0);
  2507. xassert(job_gres_ptr);
  2508. xassert(node_gres_ptr);
  2509. if (job_gres_ptr->node_cnt == 0) {
  2510. job_gres_ptr->node_cnt = node_cnt;
  2511. if (job_gres_ptr->gres_bit_alloc) {
  2512. error("gres/%s: job %u node_cnt==0 and bit_alloc is "
  2513. "set", gres_name, job_id);
  2514. xfree(job_gres_ptr->gres_bit_alloc);
  2515. }
  2516. job_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) *
  2517. node_cnt);
  2518. } else if (job_gres_ptr->node_cnt < node_cnt) {
  2519. error("gres/%s: job %u node_cnt increase from %u to %d",
  2520. gres_name, job_id, job_gres_ptr->node_cnt, node_cnt);
  2521. if (node_offset >= job_gres_ptr->node_cnt)
  2522. return SLURM_ERROR;
  2523. } else if (job_gres_ptr->node_cnt > node_cnt) {
  2524. error("gres/%s: job %u node_cnt decrease from %u to %d",
  2525. gres_name, job_id, job_gres_ptr->node_cnt, node_cnt);
  2526. }
  2527. /*
  2528. * Check that sufficient resources exist on this node
  2529. */
  2530. gres_cnt = job_gres_ptr->gres_cnt_alloc;
  2531. i = node_gres_ptr->gres_cnt_alloc + gres_cnt;
  2532. i -= node_gres_ptr->gres_cnt_avail;
  2533. if (i > 0) {
  2534. error("gres/%s: job %u node %s overallocated resources by %d",
  2535. gres_name, job_id, node_name, i);
  2536. /* proceed with request, give job what's available */
  2537. }
  2538. if (job_gres_ptr->gres_cnt_step_alloc == NULL) {
  2539. job_gres_ptr->gres_cnt_step_alloc =
  2540. xmalloc(sizeof(uint32_t) * node_cnt);
  2541. }
  2542. /*
  2543. * Select the specific resources to use for this job.
  2544. */
  2545. if (job_gres_ptr->gres_bit_alloc[node_offset]) {
  2546. /* Resuming a suspended job, resources already allocated */
  2547. if (node_gres_ptr->gres_bit_alloc == NULL) {
  2548. node_gres_ptr->gres_bit_alloc =
  2549. bit_copy(job_gres_ptr->
  2550. gres_bit_alloc[node_offset]);
  2551. if (node_gres_ptr->gres_bit_alloc == NULL)
  2552. fatal("bit_copy: malloc failure");
  2553. node_gres_ptr->gres_cnt_alloc +=
  2554. bit_set_count(node_gres_ptr->gres_bit_alloc);
  2555. } else if (node_gres_ptr->gres_bit_alloc) {
  2556. gres_cnt = MIN(bit_size(node_gres_ptr->gres_bit_alloc),
  2557. bit_size(job_gres_ptr->
  2558. gres_bit_alloc[node_offset]));
  2559. for (i=0; i<gres_cnt; i++) {
  2560. if (bit_test(job_gres_ptr->
  2561. gres_bit_alloc[node_offset], i) &&
  2562. !bit_test(node_gres_ptr->gres_bit_alloc,i)){
  2563. bit_set(node_gres_ptr->gres_bit_alloc,i);
  2564. node_gres_ptr->gres_cnt_alloc++;
  2565. }
  2566. }
  2567. }
  2568. } else if (node_gres_ptr->gres_bit_alloc) {
  2569. job_gres_ptr->gres_bit_alloc[node_offset] =
  2570. bit_alloc(node_gres_ptr->gres_cnt_avail);
  2571. if (job_gres_ptr->gres_bit_alloc[node_offset] == NULL)
  2572. fatal("bit_copy: malloc failure");
  2573. for (i=0; i<node_gres_ptr->gres_cnt_avail && gres_cnt>0; i++) {
  2574. if (bit_test(node_gres_ptr->gres_bit_alloc, i))
  2575. continue;
  2576. bit_set(node_gres_ptr->gres_bit_alloc, i);
  2577. bit_set(job_gres_ptr->gres_bit_alloc[node_offset], i);
  2578. node_gres_ptr->gres_cnt_alloc++;
  2579. gres_cnt--;
  2580. }
  2581. } else {
  2582. node_gres_ptr->gres_cnt_alloc += job_gres_ptr->gres_cnt_alloc;
  2583. }
  2584. if (job_gres_ptr->gres_bit_alloc &&
  2585. job_gres_ptr->gres_bit_alloc[node_offset] &&
  2586. node_gres_ptr->topo_gres_bitmap &&
  2587. node_gres_ptr->topo_gres_cnt_alloc) {
  2588. for (i = 0; i < node_gres_ptr->topo_cnt; i++) {
  2589. /* Insure that if specific CPUs are associated with
  2590. * specific GRES and the CPU count matches the
  2591. * slurmctld configuration that we only use the GRES
  2592. * on the CPUs that have already been allocated. */
  2593. if (core_bitmap &&
  2594. (bit_size(core_bitmap) ==
  2595. bit_size(node_gres_ptr->topo_cpus_bitmap[i])) &&
  2596. !bit_overlap(core_bitmap,
  2597. node_gres_ptr->topo_cpus_bitmap[i]))
  2598. continue;
  2599. gres_cnt = bit_overlap(job_gres_ptr->
  2600. gres_bit_alloc[node_offset],
  2601. node_gres_ptr->
  2602. topo_gres_bitmap[i]);
  2603. node_gres_ptr->topo_gres_cnt_alloc[i] += gres_cnt;
  2604. }
  2605. }
  2606. return SLURM_SUCCESS;
  2607. }
  2608. /*
  2609. * Allocate resource to a job and update node and job gres information
  2610. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  2611. * IN node_gres_list - node's gres_list built by
  2612. * gres_plugin_node_config_validate()
  2613. * IN node_cnt - total number of nodes originally allocated to the job
  2614. * IN node_offset - zero-origin index to the node of interest
  2615. * IN cpu_cnt - number of CPUs allocated to this job on this node
  2616. * IN job_id - job's ID (for logging)
  2617. * IN node_name - name of the node (for logging)
  2618. * IN core_bitmap - cores allocated to this job on this node (NULL if not
  2619. * available)
  2620. * RET SLURM_SUCCESS or error code
  2621. */
  2622. extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list,
  2623. int node_cnt, int node_offset,
  2624. uint32_t cpu_cnt, uint32_t job_id,
  2625. char *node_name, bitstr_t *core_bitmap)
  2626. {
  2627. int i, rc, rc2;
  2628. ListIterator job_gres_iter, node_gres_iter;
  2629. gres_state_t *job_gres_ptr, *node_gres_ptr;
  2630. if (job_gres_list == NULL)
  2631. return SLURM_SUCCESS;
  2632. if (node_gres_list == NULL) {
  2633. error("gres_job_alloc: job %u has gres specification while "
  2634. "node %s has none", job_id, node_name);
  2635. return SLURM_ERROR;
  2636. }
  2637. rc = gres_plugin_init();
  2638. slurm_mutex_lock(&gres_context_lock);
  2639. job_gres_iter = list_iterator_create(job_gres_list);
  2640. while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) {
  2641. for (i=0; i<gres_context_cnt; i++) {
  2642. if (job_gres_ptr->plugin_id ==
  2643. gres_context[i].plugin_id)
  2644. break;
  2645. }
  2646. if (i >= gres_context_cnt) {
  2647. error("gres_plugin_job_alloc: no plugin configured "
  2648. "for data type %u for job %u and node %s",
  2649. job_gres_ptr->plugin_id, job_id, node_name);
  2650. /* A likely sign that GresPlugins has changed */
  2651. continue;
  2652. }
  2653. node_gres_iter = list_iterator_create(node_gres_list);
  2654. while ((node_gres_ptr = (gres_state_t *)
  2655. list_next(node_gres_iter))) {
  2656. if (job_gres_ptr->plugin_id == node_gres_ptr->plugin_id)
  2657. break;
  2658. }
  2659. list_iterator_destroy(node_gres_iter);
  2660. if (node_gres_ptr == NULL) {
  2661. error("gres_plugin_job_alloc: job %u allocated gres/%s "
  2662. "on node %s lacking that gres",
  2663. job_id, gres_context[i].gres_name, node_name);
  2664. continue;
  2665. }
  2666. rc2 = _job_alloc(job_gres_ptr->gres_data,
  2667. node_gres_ptr->gres_data, node_cnt,
  2668. node_offset, cpu_cnt,
  2669. gres_context[i].gres_name, job_id, node_name,
  2670. core_bitmap);
  2671. if (rc2 != SLURM_SUCCESS)
  2672. rc = rc2;
  2673. }
  2674. list_iterator_destroy(job_gres_iter);
  2675. slurm_mutex_unlock(&gres_context_lock);
  2676. return rc;
  2677. }
  2678. static int _job_dealloc(void *job_gres_data, void *node_gres_data,
  2679. int node_offset, char *gres_name, uint32_t job_id,
  2680. char *node_name)
  2681. {
  2682. int i, len, gres_cnt;
  2683. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  2684. gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data;
  2685. /*
  2686. * Validate data structures. Either job_gres_data->node_cnt and
  2687. * job_gres_data->gres_bit_alloc are both set or both zero/NULL.
  2688. */
  2689. xassert(node_offset >= 0);
  2690. xassert(job_gres_ptr);
  2691. xassert(node_gres_ptr);
  2692. if (job_gres_ptr->node_cnt <= node_offset) {
  2693. error("gres/%s: job %u dealloc of node %s bad node_offset %d "
  2694. "count is %u", gres_name, job_id, node_name, node_offset,
  2695. job_gres_ptr->node_cnt);
  2696. return SLURM_ERROR;
  2697. }
  2698. if (node_gres_ptr->gres_bit_alloc && job_gres_ptr->gres_bit_alloc &&
  2699. job_gres_ptr->gres_bit_alloc[node_offset]) {
  2700. len = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]);
  2701. i = bit_size(node_gres_ptr->gres_bit_alloc);
  2702. if (i != len) {
  2703. error("gres/%s: job %u and node %s bitmap sizes differ "
  2704. "(%d != %d)", gres_name, job_id, node_name, len,
  2705. i);
  2706. len = MIN(len, i);
  2707. /* proceed with request, make best effort */
  2708. }
  2709. for (i=0; i<len; i++) {
  2710. if (!bit_test(job_gres_ptr->gres_bit_alloc[node_offset],
  2711. i)) {
  2712. continue;
  2713. }
  2714. bit_clear(node_gres_ptr->gres_bit_alloc, i);
  2715. /* NOTE: Do not clear bit from
  2716. * job_gres_ptr->gres_bit_alloc[node_offset]
  2717. * since this may only be an emulated deallocate */
  2718. if (node_gres_ptr->gres_cnt_alloc)
  2719. node_gres_ptr->gres_cnt_alloc--;
  2720. else {
  2721. error("gres/%s: job %u dealloc node %s gres "
  2722. "count underflow", gres_name, job_id,
  2723. node_name);
  2724. }
  2725. }
  2726. } else if (node_gres_ptr->gres_cnt_alloc >=
  2727. job_gres_ptr->gres_cnt_alloc) {
  2728. node_gres_ptr->gres_cnt_alloc -= job_gres_ptr->gres_cnt_alloc;
  2729. } else {
  2730. node_gres_ptr->gres_cnt_alloc = 0;
  2731. error("gres/%s: job %u node %s gres count underflow",
  2732. gres_name, job_id, node_name);
  2733. }
  2734. if (job_gres_ptr->gres_bit_alloc &&
  2735. job_gres_ptr->gres_bit_alloc[node_offset] &&
  2736. node_gres_ptr->topo_gres_bitmap &&
  2737. node_gres_ptr->topo_gres_cnt_alloc) {
  2738. for (i=0; i<node_gres_ptr->topo_cnt; i++) {
  2739. gres_cnt = bit_overlap(job_gres_ptr->
  2740. gres_bit_alloc[node_offset],
  2741. node_gres_ptr->
  2742. topo_gres_bitmap[i]);
  2743. node_gres_ptr->topo_gres_cnt_alloc[i] -= gres_cnt;
  2744. }
  2745. }
  2746. return SLURM_SUCCESS;
  2747. }
  2748. /*
  2749. * Deallocate resource from a job and update node and job gres information
  2750. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  2751. * IN node_gres_list - node's gres_list built by
  2752. * gres_plugin_node_config_validate()
  2753. * IN node_offset - zero-origin index to the node of interest
  2754. * IN job_id - job's ID (for logging)
  2755. * IN node_name - name of the node (for logging)
  2756. * RET SLURM_SUCCESS or error code
  2757. */
  2758. extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list,
  2759. int node_offset, uint32_t job_id,
  2760. char *node_name)
  2761. {
  2762. int i, rc, rc2;
  2763. ListIterator job_gres_iter, node_gres_iter;
  2764. gres_state_t *job_gres_ptr, *node_gres_ptr;
  2765. char *gres_name = NULL;
  2766. if (job_gres_list == NULL)
  2767. return SLURM_SUCCESS;
  2768. if (node_gres_list == NULL) {
  2769. error("gres_job_dealloc: job %u has gres specification while "
  2770. "node %s has none", job_id, node_name);
  2771. return SLURM_ERROR;
  2772. }
  2773. rc = gres_plugin_init();
  2774. slurm_mutex_lock(&gres_context_lock);
  2775. job_gres_iter = list_iterator_create(job_gres_list);
  2776. while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) {
  2777. for (i=0; i<gres_context_cnt; i++) {
  2778. if (job_gres_ptr->plugin_id ==
  2779. gres_context[i].plugin_id)
  2780. break;
  2781. }
  2782. if (i >= gres_context_cnt) {
  2783. error("gres_plugin_job_dealloc: no plugin configured "
  2784. "for data type %u for job %u and node %s",
  2785. job_gres_ptr->plugin_id, job_id, node_name);
  2786. /* A likely sign that GresPlugins has changed */
  2787. gres_name = "UNKNOWN";
  2788. } else
  2789. gres_name = gres_context[i].gres_name;
  2790. node_gres_iter = list_iterator_create(node_gres_list);
  2791. while ((node_gres_ptr = (gres_state_t *)
  2792. list_next(node_gres_iter))) {
  2793. if (job_gres_ptr->plugin_id == node_gres_ptr->plugin_id)
  2794. break;
  2795. }
  2796. list_iterator_destroy(node_gres_iter);
  2797. if (node_gres_ptr == NULL) {
  2798. error("gres_plugin_job_dealloc: node %s lacks gres/%s "
  2799. "for job %u", node_name, gres_name , job_id);
  2800. continue;
  2801. }
  2802. rc2 = _job_dealloc(job_gres_ptr->gres_data,
  2803. node_gres_ptr->gres_data, node_offset,
  2804. gres_name, job_id, node_name);
  2805. if (rc2 != SLURM_SUCCESS)
  2806. rc = rc2;
  2807. }
  2808. list_iterator_destroy(job_gres_iter);
  2809. slurm_mutex_unlock(&gres_context_lock);
  2810. return rc;
  2811. }
  2812. /*
  2813. * Merge one job's gres allocation into another job's gres allocation.
  2814. * IN from_job_gres_list - List of gres records for the job being merged
  2815. * into another job
  2816. * IN from_job_node_bitmap - bitmap of nodes for the job being merged into
  2817. * another job
  2818. * IN/OUT to_job_gres_list - List of gres records for the job being merged
  2819. * into job
  2820. * IN to_job_node_bitmap - bitmap of nodes for the job being merged into
  2821. */
  2822. extern void gres_plugin_job_merge(List from_job_gres_list,
  2823. bitstr_t *from_job_node_bitmap,
  2824. List to_job_gres_list,
  2825. bitstr_t *to_job_node_bitmap)
  2826. {
  2827. ListIterator gres_iter;
  2828. gres_state_t *gres_ptr, *gres_ptr2;
  2829. gres_job_state_t *gres_job_ptr, *gres_job_ptr2;
  2830. int new_node_cnt;
  2831. int i_first, i_last, i;
  2832. int from_inx, to_inx, new_inx;
  2833. bitstr_t **new_gres_bit_alloc, **new_gres_bit_step_alloc;
  2834. uint32_t *new_gres_cnt_step_alloc;
  2835. (void) gres_plugin_init();
  2836. new_node_cnt = bit_set_count(from_job_node_bitmap) +
  2837. bit_set_count(to_job_node_bitmap) -
  2838. bit_overlap(from_job_node_bitmap, to_job_node_bitmap);
  2839. i_first = MIN(bit_ffs(from_job_node_bitmap),
  2840. bit_ffs(to_job_node_bitmap));
  2841. i_first = MAX(i_first, 0);
  2842. i_last = MAX(bit_fls(from_job_node_bitmap),
  2843. bit_fls(to_job_node_bitmap));
  2844. if (i_last == -1) {
  2845. error("gres_plugin_job_merge: node_bitmaps are empty");
  2846. return;
  2847. }
  2848. slurm_mutex_lock(&gres_context_lock);
  2849. /* Step one - Expand the gres data structures in "to" job */
  2850. if (!to_job_gres_list)
  2851. goto step2;
  2852. gres_iter = list_iterator_create(to_job_gres_list);
  2853. if (!gres_iter)
  2854. fatal("list_iterator_create: malloc failure");
  2855. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  2856. gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data;
  2857. new_gres_bit_alloc = xmalloc(sizeof(bitstr_t *) *
  2858. new_node_cnt);
  2859. new_gres_bit_step_alloc = xmalloc(sizeof(bitstr_t *) *
  2860. new_node_cnt);
  2861. new_gres_cnt_step_alloc = xmalloc(sizeof(uint32_t) *
  2862. new_node_cnt);
  2863. if (!new_gres_bit_alloc || !new_gres_bit_step_alloc ||
  2864. !new_gres_cnt_step_alloc)
  2865. fatal("malloc failure");
  2866. from_inx = to_inx = new_inx = -1;
  2867. for (i = i_first; i <= i_last; i++) {
  2868. bool from_match = false, to_match = false;
  2869. if (bit_test(to_job_node_bitmap, i)) {
  2870. to_match = true;
  2871. to_inx++;
  2872. }
  2873. if (bit_test(from_job_node_bitmap, i)) {
  2874. from_match = true;
  2875. from_inx++;
  2876. }
  2877. if (from_match || to_match)
  2878. new_inx++;
  2879. if (to_match) {
  2880. if (gres_job_ptr->gres_bit_alloc) {
  2881. new_gres_bit_alloc[new_inx] =
  2882. gres_job_ptr->
  2883. gres_bit_alloc[to_inx];
  2884. }
  2885. if (gres_job_ptr->gres_bit_step_alloc) {
  2886. new_gres_bit_step_alloc[new_inx] =
  2887. gres_job_ptr->
  2888. gres_bit_step_alloc[to_inx];
  2889. }
  2890. if (gres_job_ptr->gres_cnt_step_alloc) {
  2891. new_gres_cnt_step_alloc[new_inx] =
  2892. gres_job_ptr->
  2893. gres_cnt_step_alloc[to_inx];
  2894. }
  2895. }
  2896. }
  2897. gres_job_ptr->node_cnt = new_node_cnt;
  2898. xfree(gres_job_ptr->gres_bit_alloc);
  2899. gres_job_ptr->gres_bit_alloc = new_gres_bit_alloc;
  2900. xfree(gres_job_ptr->gres_bit_step_alloc);
  2901. gres_job_ptr->gres_bit_step_alloc = new_gres_bit_step_alloc;
  2902. xfree(gres_job_ptr->gres_cnt_step_alloc);
  2903. gres_job_ptr->gres_cnt_step_alloc = new_gres_cnt_step_alloc;
  2904. }
  2905. list_iterator_destroy(gres_iter);
  2906. /* Step two - Merge the gres information from the "from" job into the
  2907. * existing gres information for the "to" job */
  2908. step2: if (!from_job_gres_list)
  2909. goto step3;
  2910. if (!to_job_gres_list) {
  2911. to_job_gres_list = list_create(_gres_job_list_delete);
  2912. if (!to_job_gres_list)
  2913. fatal("list_create: malloc failure");
  2914. }
  2915. gres_iter = list_iterator_create(from_job_gres_list);
  2916. if (!gres_iter)
  2917. fatal("list_iterator_create: malloc failure");
  2918. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  2919. gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data;
  2920. gres_ptr2 = list_find_first(to_job_gres_list, _gres_find_id,
  2921. &gres_ptr->plugin_id);
  2922. if (gres_ptr2) {
  2923. gres_job_ptr2 = gres_ptr2->gres_data;
  2924. } else {
  2925. gres_ptr2 = xmalloc(sizeof(gres_state_t));
  2926. gres_job_ptr2 = xmalloc(sizeof(gres_job_state_t));
  2927. gres_ptr2->plugin_id = gres_ptr->plugin_id;
  2928. gres_ptr2->gres_data = gres_job_ptr2;
  2929. gres_job_ptr2->gres_cnt_alloc = gres_job_ptr->
  2930. gres_cnt_alloc;
  2931. gres_job_ptr2->node_cnt = new_node_cnt;
  2932. gres_job_ptr2->gres_bit_alloc =
  2933. xmalloc(sizeof(bitstr_t *) * new_node_cnt);
  2934. gres_job_ptr2->gres_bit_step_alloc =
  2935. xmalloc(sizeof(bitstr_t *) * new_node_cnt);
  2936. gres_job_ptr2->gres_cnt_step_alloc =
  2937. xmalloc(sizeof(uint32_t) * new_node_cnt);
  2938. list_append(to_job_gres_list, gres_ptr2);
  2939. }
  2940. from_inx = to_inx = new_inx = -1;
  2941. for (i = i_first; i <= i_last; i++) {
  2942. bool from_match = false, to_match = false;
  2943. if (bit_test(to_job_node_bitmap, i)) {
  2944. to_match = true;
  2945. to_inx++;
  2946. }
  2947. if (bit_test(from_job_node_bitmap, i)) {
  2948. from_match = true;
  2949. from_inx++;
  2950. }
  2951. if (from_match || to_match)
  2952. new_inx++;
  2953. if (from_match) {
  2954. if (!gres_job_ptr->gres_bit_alloc) {
  2955. ;
  2956. } else if (gres_job_ptr2->
  2957. gres_bit_alloc[new_inx]) {
  2958. /* Do not merge GRES allocations on
  2959. * a node, just keep original job's */
  2960. #if 0
  2961. bit_or(gres_job_ptr2->
  2962. gres_bit_alloc[new_inx],
  2963. gres_job_ptr->
  2964. gres_bit_alloc[from_inx]);
  2965. #endif
  2966. } else {
  2967. gres_job_ptr2->gres_bit_alloc[new_inx] =
  2968. gres_job_ptr->
  2969. gres_bit_alloc[from_inx];
  2970. gres_job_ptr->
  2971. gres_bit_alloc
  2972. [from_inx] = NULL;
  2973. }
  2974. if (gres_job_ptr->gres_cnt_step_alloc &&
  2975. gres_job_ptr->
  2976. gres_cnt_step_alloc[from_inx]) {
  2977. error("Attempt to merge gres, from "
  2978. "job has active steps");
  2979. }
  2980. }
  2981. }
  2982. }
  2983. list_iterator_destroy(gres_iter);
  2984. step3: slurm_mutex_unlock(&gres_context_lock);
  2985. return;
  2986. }
  2987. /*
  2988. * Set environment variables as required for a batch job
  2989. * IN/OUT job_env_ptr - environment variable array
  2990. * IN gres_list - generated by gres_plugin_job_alloc()
  2991. */
  2992. extern void gres_plugin_job_set_env(char ***job_env_ptr, List job_gres_list)
  2993. {
  2994. int i;
  2995. ListIterator gres_iter;
  2996. gres_state_t *gres_ptr = NULL;
  2997. (void) gres_plugin_init();
  2998. slurm_mutex_lock(&gres_context_lock);
  2999. for (i=0; i<gres_context_cnt; i++) {
  3000. if (gres_context[i].ops.job_set_env == NULL)
  3001. continue; /* No plugin to call */
  3002. if (job_gres_list) {
  3003. gres_iter = list_iterator_create(job_gres_list);
  3004. while ((gres_ptr = (gres_state_t *)
  3005. list_next(gres_iter))) {
  3006. if (gres_ptr->plugin_id !=
  3007. gres_context[i].plugin_id)
  3008. continue;
  3009. (*(gres_context[i].ops.job_set_env))
  3010. (job_env_ptr, gres_ptr->gres_data);
  3011. break;
  3012. }
  3013. list_iterator_destroy(gres_iter);
  3014. }
  3015. if (gres_ptr == NULL) { /* No data found */
  3016. (*(gres_context[i].ops.job_set_env))
  3017. (job_env_ptr, NULL);
  3018. }
  3019. }
  3020. slurm_mutex_unlock(&gres_context_lock);
  3021. }
  3022. static void _job_state_log(void *gres_data, uint32_t job_id, char *gres_name)
  3023. {
  3024. gres_job_state_t *gres_ptr;
  3025. char tmp_str[128];
  3026. int i;
  3027. xassert(gres_data);
  3028. gres_ptr = (gres_job_state_t *) gres_data;
  3029. info("gres: %s state for job %u", gres_name, job_id);
  3030. info(" gres_cnt:%u node_cnt:%u", gres_ptr->gres_cnt_alloc,
  3031. gres_ptr->node_cnt);
  3032. if (gres_ptr->node_cnt == 0)
  3033. return;
  3034. if (gres_ptr->gres_bit_alloc == NULL)
  3035. info(" gres_bit_alloc:NULL");
  3036. if (gres_ptr->gres_bit_step_alloc == NULL)
  3037. info(" gres_bit_step_alloc:NULL");
  3038. if (gres_ptr->gres_cnt_step_alloc == NULL)
  3039. info(" gres_cnt_step_alloc:NULL");
  3040. for (i=0; i<gres_ptr->node_cnt; i++) {
  3041. if (gres_ptr->gres_bit_alloc && gres_ptr->gres_bit_alloc[i]) {
  3042. bit_fmt(tmp_str, sizeof(tmp_str),
  3043. gres_ptr->gres_bit_alloc[i]);
  3044. info(" gres_bit_alloc[%d]:%s", i, tmp_str);
  3045. } else if (gres_ptr->gres_bit_alloc)
  3046. info(" gres_bit_alloc[%d]:NULL", i);
  3047. if (gres_ptr->gres_bit_step_alloc &&
  3048. gres_ptr->gres_bit_step_alloc[i]) {
  3049. bit_fmt(tmp_str, sizeof(tmp_str),
  3050. gres_ptr->gres_bit_step_alloc[i]);
  3051. info(" gres_bit_step_alloc[%d]:%s", i, tmp_str);
  3052. } else if (gres_ptr->gres_bit_step_alloc)
  3053. info(" gres_bit_step_alloc[%d]:NULL", i);
  3054. if (gres_ptr->gres_cnt_step_alloc) {
  3055. info(" gres_cnt_step_alloc[%d]:%u", i,
  3056. gres_ptr->gres_cnt_step_alloc[i]);
  3057. }
  3058. }
  3059. }
  3060. /*
  3061. * Extract from the job record's gres_list the count of allocated resources of
  3062. * the named gres gres typee.
  3063. * IN job_gres_list - job record's gres_list.
  3064. * IN gres_name_type - the name of the gres type to retrieve the associated
  3065. * value from.
  3066. * RET The value associated with the gres type or NO_VAL if not found.
  3067. */
  3068. extern uint32_t gres_plugin_get_job_value_by_type(List job_gres_list,
  3069. char *gres_name_type)
  3070. {
  3071. uint32_t gres_val, gres_name_type_id;
  3072. ListIterator job_gres_iter;
  3073. gres_state_t *job_gres_ptr;
  3074. if (job_gres_list == NULL)
  3075. return NO_VAL;
  3076. gres_name_type_id = _build_id(gres_name_type);
  3077. gres_val = NO_VAL;
  3078. job_gres_iter = list_iterator_create(job_gres_list);
  3079. while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) {
  3080. if (job_gres_ptr->plugin_id == gres_name_type_id) {
  3081. gres_val = ((gres_job_state_t*)
  3082. (job_gres_ptr->gres_data))->gres_cnt_alloc;
  3083. break;
  3084. }
  3085. }
  3086. list_iterator_destroy(job_gres_iter);
  3087. return gres_val;
  3088. }
  3089. /*
  3090. * Log a job's current gres state
  3091. * IN gres_list - generated by gres_plugin_job_state_validate()
  3092. * IN job_id - job's ID
  3093. */
  3094. extern void gres_plugin_job_state_log(List gres_list, uint32_t job_id)
  3095. {
  3096. int i;
  3097. ListIterator gres_iter;
  3098. gres_state_t *gres_ptr;
  3099. if (!gres_debug || (gres_list == NULL))
  3100. return;
  3101. (void) gres_plugin_init();
  3102. slurm_mutex_lock(&gres_context_lock);
  3103. gres_iter = list_iterator_create(gres_list);
  3104. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  3105. for (i=0; i<gres_context_cnt; i++) {
  3106. if (gres_ptr->plugin_id !=
  3107. gres_context[i].plugin_id)
  3108. continue;
  3109. _job_state_log(gres_ptr->gres_data, job_id,
  3110. gres_context[i].gres_name);
  3111. break;
  3112. }
  3113. }
  3114. list_iterator_destroy(gres_iter);
  3115. slurm_mutex_unlock(&gres_context_lock);
  3116. }
  3117. extern void gres_plugin_job_state_file(List gres_list, int *gres_bit_alloc,
  3118. int *gres_count)
  3119. {
  3120. int i, j, gres_cnt=0, len, p, found=0;
  3121. ListIterator gres_iter;
  3122. gres_state_t *gres_ptr;
  3123. gres_job_state_t *gres_job_ptr;
  3124. if (gres_list == NULL)
  3125. return;
  3126. (void) gres_plugin_init();
  3127. slurm_mutex_lock(&gres_context_lock);
  3128. gres_iter = list_iterator_create(gres_list);
  3129. if (!gres_iter)
  3130. fatal("list_iterator_create: malloc failure");
  3131. for (j=0; j<gres_context_cnt; j++) {
  3132. found = 0;
  3133. list_iterator_reset(gres_iter);
  3134. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))){
  3135. if (gres_ptr->plugin_id !=
  3136. gres_context[j].plugin_id ) {
  3137. continue;
  3138. }
  3139. found = 1;
  3140. gres_job_ptr = (gres_job_state_t *) gres_ptr->gres_data;
  3141. if ((gres_job_ptr != NULL) &&
  3142. (gres_job_ptr->node_cnt == 1) &&
  3143. (gres_job_ptr->gres_bit_alloc != NULL) &&
  3144. (gres_job_ptr->gres_bit_alloc[0] != NULL)) {
  3145. len = bit_size(gres_job_ptr->gres_bit_alloc[0]);
  3146. for (i=0; i<len; i++) {
  3147. if (!bit_test(gres_job_ptr->
  3148. gres_bit_alloc[0], i))
  3149. gres_bit_alloc[gres_cnt] = 0;
  3150. else
  3151. gres_bit_alloc[gres_cnt] = 1;
  3152. gres_cnt++;
  3153. }
  3154. }
  3155. break;
  3156. }
  3157. if (found == 0) {
  3158. for (p=0; p<gres_count[j]; p++){
  3159. gres_bit_alloc[gres_cnt] = 0;
  3160. gres_cnt++;
  3161. }
  3162. }
  3163. }
  3164. list_iterator_destroy(gres_iter);
  3165. slurm_mutex_unlock(&gres_context_lock);
  3166. }
  3167. static void _step_state_delete(void *gres_data)
  3168. {
  3169. int i;
  3170. gres_step_state_t *gres_ptr = (gres_step_state_t *) gres_data;
  3171. if (gres_ptr == NULL)
  3172. return;
  3173. FREE_NULL_BITMAP(gres_ptr->node_in_use);
  3174. if (gres_ptr->gres_bit_alloc) {
  3175. for (i=0; i<gres_ptr->node_cnt; i++)
  3176. FREE_NULL_BITMAP(gres_ptr->gres_bit_alloc[i]);
  3177. xfree(gres_ptr->gres_bit_alloc);
  3178. }
  3179. xfree(gres_ptr);
  3180. }
  3181. static void _gres_step_list_delete(void *list_element)
  3182. {
  3183. gres_state_t *gres_ptr = (gres_state_t *) list_element;
  3184. _step_state_delete(gres_ptr->gres_data);
  3185. xfree(gres_ptr);
  3186. }
  3187. static int _step_state_validate(char *config, void **gres_data,
  3188. slurm_gres_context_t *context_ptr)
  3189. {
  3190. int rc;
  3191. uint32_t gres_cnt;
  3192. rc = _job_config_validate(config, &gres_cnt, context_ptr);
  3193. if ((rc == SLURM_SUCCESS) && (gres_cnt > 0)) {
  3194. gres_step_state_t *gres_ptr;
  3195. gres_ptr = xmalloc(sizeof(gres_step_state_t));
  3196. gres_ptr->gres_cnt_alloc = gres_cnt;
  3197. *gres_data = gres_ptr;
  3198. } else
  3199. *gres_data = NULL;
  3200. return rc;
  3201. }
  3202. static uint32_t _step_test(void *step_gres_data, void *job_gres_data,
  3203. int node_offset, bool ignore_alloc, char *gres_name,
  3204. uint32_t job_id, uint32_t step_id)
  3205. {
  3206. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  3207. gres_step_state_t *step_gres_ptr = (gres_step_state_t *) step_gres_data;
  3208. uint32_t gres_cnt;
  3209. xassert(job_gres_ptr);
  3210. xassert(step_gres_ptr);
  3211. if (node_offset == NO_VAL) {
  3212. if (step_gres_ptr->gres_cnt_alloc >
  3213. job_gres_ptr->gres_cnt_alloc)
  3214. return 0;
  3215. return NO_VAL;
  3216. }
  3217. if (node_offset >= job_gres_ptr->node_cnt) {
  3218. error("gres/%s: step_test %u.%u node offset invalid (%d >= %u)",
  3219. gres_name, job_id, step_id, node_offset,
  3220. job_gres_ptr->node_cnt);
  3221. return 0;
  3222. }
  3223. if (job_gres_ptr->gres_cnt_step_alloc) {
  3224. if (step_gres_ptr->gres_cnt_alloc >
  3225. (job_gres_ptr->gres_cnt_alloc -
  3226. job_gres_ptr->gres_cnt_step_alloc[node_offset]))
  3227. return 0;
  3228. } else {
  3229. error("gres/%s: step_test %u.%u gres_bit_alloc is NULL",
  3230. gres_name, job_id, step_id);
  3231. return 0;
  3232. }
  3233. if (job_gres_ptr->gres_bit_alloc &&
  3234. job_gres_ptr->gres_bit_alloc[node_offset]) {
  3235. gres_cnt = bit_set_count(job_gres_ptr->
  3236. gres_bit_alloc[node_offset]);
  3237. if (!ignore_alloc &&
  3238. job_gres_ptr->gres_bit_step_alloc &&
  3239. job_gres_ptr->gres_bit_step_alloc[node_offset]) {
  3240. gres_cnt -= bit_set_count(job_gres_ptr->
  3241. gres_bit_step_alloc
  3242. [node_offset]);
  3243. }
  3244. if (step_gres_ptr->gres_cnt_alloc > gres_cnt)
  3245. gres_cnt = 0;
  3246. else
  3247. gres_cnt = NO_VAL;
  3248. } else if (job_gres_ptr->gres_cnt_step_alloc &&
  3249. job_gres_ptr->gres_cnt_step_alloc[node_offset]) {
  3250. gres_cnt = job_gres_ptr->gres_cnt_alloc -
  3251. job_gres_ptr->gres_cnt_step_alloc[node_offset];
  3252. if (step_gres_ptr->gres_cnt_alloc > gres_cnt)
  3253. gres_cnt = 0;
  3254. else
  3255. gres_cnt = NO_VAL;
  3256. } else {
  3257. /* Note: We already validated the gres count above */
  3258. debug("gres/%s: step_test %u.%u gres_bit_alloc is NULL",
  3259. gres_name, job_id, step_id);
  3260. gres_cnt = NO_VAL;
  3261. }
  3262. return gres_cnt;
  3263. }
  3264. /*
  3265. * Given a step's requested gres configuration, validate it and build gres list
  3266. * IN req_config - step request's gres input string
  3267. * OUT step_gres_list - List of Gres records for this step to track usage
  3268. * IN job_gres_list - List of Gres records for this job
  3269. * IN job_id, step_id - ID of the step being allocated.
  3270. * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
  3271. */
  3272. extern int gres_plugin_step_state_validate(char *req_config,
  3273. List *step_gres_list,
  3274. List job_gres_list, uint32_t job_id,
  3275. uint32_t step_id)
  3276. {
  3277. char *tmp_str, *tok, *last = NULL;
  3278. int i, rc, rc2, rc3;
  3279. gres_state_t *step_gres_ptr, *job_gres_ptr;
  3280. void *step_gres_data, *job_gres_data;
  3281. ListIterator job_gres_iter;
  3282. *step_gres_list = NULL;
  3283. if ((req_config == NULL) || (req_config[0] == '\0'))
  3284. return SLURM_SUCCESS;
  3285. if ((rc = gres_plugin_init()) != SLURM_SUCCESS)
  3286. return rc;
  3287. slurm_mutex_lock(&gres_context_lock);
  3288. tmp_str = xstrdup(req_config);
  3289. tok = strtok_r(tmp_str, ",", &last);
  3290. while (tok && (rc == SLURM_SUCCESS)) {
  3291. rc2 = SLURM_ERROR;
  3292. for (i=0; i<gres_context_cnt; i++) {
  3293. rc2 = _step_state_validate(tok, &step_gres_data,
  3294. &gres_context[i]);
  3295. if ((rc2 != SLURM_SUCCESS) || (step_gres_data == NULL))
  3296. continue;
  3297. if (job_gres_list == NULL) {
  3298. info("step %u.%u has gres spec, job has none",
  3299. job_id, step_id);
  3300. rc2 = ESLURM_INVALID_GRES;
  3301. continue;
  3302. }
  3303. /* Now make sure the step's request isn't too big for
  3304. * the job's gres allocation */
  3305. job_gres_iter = list_iterator_create(job_gres_list);
  3306. if (job_gres_iter == NULL)
  3307. fatal("list_iterator_create: malloc failure");
  3308. while ((job_gres_ptr = (gres_state_t *)
  3309. list_next(job_gres_iter))) {
  3310. if (job_gres_ptr->plugin_id ==
  3311. gres_context[i].plugin_id)
  3312. break;
  3313. }
  3314. list_iterator_destroy(job_gres_iter);
  3315. if (job_gres_ptr == NULL) {
  3316. info("Step %u.%u gres request not in job "
  3317. "alloc %s", job_id, step_id, tok);
  3318. rc = ESLURM_INVALID_GRES;
  3319. _step_state_delete(step_gres_data);
  3320. break;
  3321. }
  3322. job_gres_data = job_gres_ptr->gres_data;
  3323. rc3 = _step_test(step_gres_data, job_gres_data, NO_VAL,
  3324. true, gres_context[i].gres_name,
  3325. job_id, step_id);
  3326. if (rc3 == 0) {
  3327. info("Step %u.%u gres higher than in job "
  3328. "allocation %s", job_id, step_id, tok);
  3329. rc = ESLURM_INVALID_GRES;
  3330. _step_state_delete(step_gres_data);
  3331. break;
  3332. }
  3333. if (*step_gres_list == NULL) {
  3334. *step_gres_list = list_create(
  3335. _gres_step_list_delete);
  3336. if (*step_gres_list == NULL)
  3337. fatal("list_create malloc failure");
  3338. }
  3339. step_gres_ptr = xmalloc(sizeof(gres_state_t));
  3340. step_gres_ptr->plugin_id = gres_context[i].plugin_id;
  3341. step_gres_ptr->gres_data = step_gres_data;
  3342. list_append(*step_gres_list, step_gres_ptr);
  3343. break; /* processed it */
  3344. }
  3345. if (rc2 != SLURM_SUCCESS) {
  3346. info("Invalid gres step %u.%u specification %s",
  3347. job_id, step_id, tok);
  3348. rc = ESLURM_INVALID_GRES;
  3349. break;
  3350. }
  3351. tok = strtok_r(NULL, ",", &last);
  3352. }
  3353. slurm_mutex_unlock(&gres_context_lock);
  3354. xfree(tmp_str);
  3355. return rc;
  3356. }
  3357. static void *_step_state_dup(void *gres_data)
  3358. {
  3359. int i;
  3360. gres_step_state_t *gres_ptr = (gres_step_state_t *) gres_data;
  3361. gres_step_state_t *new_gres_ptr;
  3362. xassert(gres_ptr);
  3363. new_gres_ptr = xmalloc(sizeof(gres_step_state_t));
  3364. new_gres_ptr->gres_cnt_alloc = gres_ptr->gres_cnt_alloc;
  3365. new_gres_ptr->node_cnt = gres_ptr->node_cnt;
  3366. if (gres_ptr->node_in_use)
  3367. new_gres_ptr->node_in_use = bit_copy(gres_ptr->node_in_use);
  3368. if (gres_ptr->gres_bit_alloc) {
  3369. new_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) *
  3370. gres_ptr->node_cnt);
  3371. for (i=0; i<gres_ptr->node_cnt; i++) {
  3372. if (gres_ptr->gres_bit_alloc[i] == NULL)
  3373. continue;
  3374. new_gres_ptr->gres_bit_alloc[i] = bit_copy(gres_ptr->
  3375. gres_bit_alloc[i]);
  3376. }
  3377. }
  3378. return new_gres_ptr;
  3379. }
  3380. static void *_step_state_dup2(void *gres_data, int node_index)
  3381. {
  3382. gres_step_state_t *gres_ptr = (gres_step_state_t *) gres_data;
  3383. gres_step_state_t *new_gres_ptr;
  3384. xassert(gres_ptr);
  3385. new_gres_ptr = xmalloc(sizeof(gres_step_state_t));
  3386. new_gres_ptr->gres_cnt_alloc = gres_ptr->gres_cnt_alloc;
  3387. new_gres_ptr->node_cnt = 1;
  3388. if (gres_ptr->node_in_use)
  3389. new_gres_ptr->node_in_use = bit_copy(gres_ptr->node_in_use);
  3390. if ((node_index < gres_ptr->node_cnt) && gres_ptr->gres_bit_alloc &&
  3391. gres_ptr->gres_bit_alloc[node_index]) {
  3392. new_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *));
  3393. new_gres_ptr->gres_bit_alloc[0] =
  3394. bit_copy(gres_ptr->gres_bit_alloc[node_index]);
  3395. }
  3396. return new_gres_ptr;
  3397. }
  3398. /*
  3399. * Create a copy of a step's gres state
  3400. * IN gres_list - List of Gres records for this step to track usage
  3401. * RET The copy or NULL on failure
  3402. */
  3403. List gres_plugin_step_state_dup(List gres_list)
  3404. {
  3405. return gres_plugin_step_state_extract(gres_list, -1);
  3406. }
  3407. /*
  3408. * Create a copy of a step's gres state for a particular node index
  3409. * IN gres_list - List of Gres records for this step to track usage
  3410. * IN node_index - zero-origin index to the node
  3411. * RET The copy or NULL on failure
  3412. */
  3413. List gres_plugin_step_state_extract(List gres_list, int node_index)
  3414. {
  3415. ListIterator gres_iter;
  3416. gres_state_t *gres_ptr, *new_gres_state;
  3417. List new_gres_list = NULL;
  3418. void *new_gres_data;
  3419. if (gres_list == NULL)
  3420. return new_gres_list;
  3421. (void) gres_plugin_init();
  3422. slurm_mutex_lock(&gres_context_lock);
  3423. gres_iter = list_iterator_create(gres_list);
  3424. if (!gres_iter)
  3425. fatal("list_iterator_create: malloc failure");
  3426. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  3427. if (node_index == -1)
  3428. new_gres_data = _step_state_dup(gres_ptr->gres_data);
  3429. else {
  3430. new_gres_data = _step_state_dup2(gres_ptr->gres_data,
  3431. node_index);
  3432. }
  3433. if (new_gres_list == NULL) {
  3434. new_gres_list = list_create(_gres_step_list_delete);
  3435. if (new_gres_list == NULL)
  3436. fatal("list_create: malloc failure");
  3437. }
  3438. new_gres_state = xmalloc(sizeof(gres_state_t));
  3439. new_gres_state->plugin_id = gres_ptr->plugin_id;
  3440. new_gres_state->gres_data = new_gres_data;
  3441. list_append(new_gres_list, new_gres_state);
  3442. }
  3443. list_iterator_destroy(gres_iter);
  3444. slurm_mutex_unlock(&gres_context_lock);
  3445. return new_gres_list;
  3446. }
  3447. /*
  3448. * A job allocation size has changed. Update the job step gres information
  3449. * bitmaps and other data structures.
  3450. * IN gres_list - List of Gres records for this step to track usage
  3451. * IN orig_job_node_bitmap - bitmap of nodes in the original job allocation
  3452. * IN new_job_node_bitmap - bitmap of nodes in the new job allocation
  3453. */
  3454. void gres_plugin_step_state_rebase(List gres_list,
  3455. bitstr_t *orig_job_node_bitmap,
  3456. bitstr_t *new_job_node_bitmap)
  3457. {
  3458. ListIterator gres_iter;
  3459. gres_state_t *gres_ptr;
  3460. gres_step_state_t *gres_step_ptr;
  3461. int new_node_cnt;
  3462. int i_first, i_last, i;
  3463. int old_inx, new_inx;
  3464. bitstr_t *new_node_in_use;
  3465. bitstr_t **new_gres_bit_alloc = NULL;
  3466. if (gres_list == NULL)
  3467. return;
  3468. (void) gres_plugin_init();
  3469. slurm_mutex_lock(&gres_context_lock);
  3470. gres_iter = list_iterator_create(gres_list);
  3471. if (!gres_iter)
  3472. fatal("list_iterator_create: malloc failure");
  3473. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  3474. gres_step_ptr = (gres_step_state_t *) gres_ptr->gres_data;
  3475. if (!gres_step_ptr)
  3476. continue;
  3477. if (!gres_step_ptr->node_in_use) {
  3478. error("gres_plugin_step_state_rebase: node_in_use is "
  3479. "NULL");
  3480. continue;
  3481. }
  3482. new_node_cnt = bit_set_count(new_job_node_bitmap);
  3483. i_first = MIN(bit_ffs(orig_job_node_bitmap),
  3484. bit_ffs(new_job_node_bitmap));
  3485. i_first = MAX(i_first, 0);
  3486. i_last = MAX(bit_fls(orig_job_node_bitmap),
  3487. bit_fls(new_job_node_bitmap));
  3488. if (i_last == -1) {
  3489. error("gres_plugin_step_state_rebase: node_bitmaps "
  3490. "are empty");
  3491. continue;
  3492. }
  3493. new_node_in_use = bit_alloc(new_node_cnt);
  3494. if (!new_node_in_use)
  3495. fatal("bit_alloc: malloc failure");
  3496. old_inx = new_inx = -1;
  3497. for (i = i_first; i <= i_last; i++) {
  3498. bool old_match = false, new_match = false;
  3499. if (bit_test(orig_job_node_bitmap, i)) {
  3500. old_match = true;
  3501. old_inx++;
  3502. }
  3503. if (bit_test(new_job_node_bitmap, i)) {
  3504. new_match = true;
  3505. new_inx++;
  3506. }
  3507. if (old_match && new_match) {
  3508. bit_set(new_node_in_use, new_inx);
  3509. if (gres_step_ptr->gres_bit_alloc) {
  3510. if (!new_gres_bit_alloc) {
  3511. new_gres_bit_alloc =
  3512. xmalloc(
  3513. sizeof(bitstr_t *) *
  3514. new_node_cnt);
  3515. }
  3516. new_gres_bit_alloc[new_inx] =
  3517. gres_step_ptr->gres_bit_alloc[old_inx];
  3518. }
  3519. } else if (old_match &&
  3520. gres_step_ptr->gres_bit_alloc &&
  3521. gres_step_ptr->gres_bit_alloc[old_inx]) {
  3522. /* Node removed from job allocation,
  3523. * release step's resources */
  3524. bit_free(gres_step_ptr->
  3525. gres_bit_alloc[old_inx]);
  3526. }
  3527. }
  3528. gres_step_ptr->node_cnt = new_node_cnt;
  3529. bit_free(gres_step_ptr->node_in_use);
  3530. gres_step_ptr->node_in_use = new_node_in_use;
  3531. xfree(gres_step_ptr->gres_bit_alloc);
  3532. gres_step_ptr->gres_bit_alloc = new_gres_bit_alloc;
  3533. }
  3534. list_iterator_destroy(gres_iter);
  3535. slurm_mutex_unlock(&gres_context_lock);
  3536. return;
  3537. }
  3538. /*
  3539. * Pack a step's current gres status, called from slurmctld for save/restore
  3540. * IN gres_list - generated by gres_plugin_step_allocate()
  3541. * IN/OUT buffer - location to write state to
  3542. * IN job_id, step_id - job and step ID for logging
  3543. */
  3544. extern int gres_plugin_step_state_pack(List gres_list, Buf buffer,
  3545. uint32_t job_id, uint32_t step_id,
  3546. uint16_t protocol_version)
  3547. {
  3548. int i, rc = SLURM_SUCCESS;
  3549. uint32_t top_offset, tail_offset, magic = GRES_MAGIC;
  3550. uint16_t rec_cnt = 0;
  3551. ListIterator gres_iter;
  3552. gres_state_t *gres_ptr;
  3553. gres_step_state_t *gres_step_ptr;
  3554. top_offset = get_buf_offset(buffer);
  3555. pack16(rec_cnt, buffer); /* placeholder if data */
  3556. if (gres_list == NULL)
  3557. return rc;
  3558. (void) gres_plugin_init();
  3559. slurm_mutex_lock(&gres_context_lock);
  3560. gres_iter = list_iterator_create(gres_list);
  3561. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  3562. gres_step_ptr = (gres_step_state_t *) gres_ptr->gres_data;
  3563. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  3564. pack32(magic, buffer);
  3565. pack32(gres_ptr->plugin_id, buffer);
  3566. pack32(gres_step_ptr->gres_cnt_alloc, buffer);
  3567. pack32(gres_step_ptr->node_cnt, buffer);
  3568. pack_bit_str(gres_step_ptr->node_in_use, buffer);
  3569. if (gres_step_ptr->gres_bit_alloc) {
  3570. pack8((uint8_t) 1, buffer);
  3571. for (i=0; i<gres_step_ptr->node_cnt; i++)
  3572. pack_bit_str(gres_step_ptr->
  3573. gres_bit_alloc[i],
  3574. buffer);
  3575. } else {
  3576. pack8((uint8_t) 0, buffer);
  3577. }
  3578. rec_cnt++;
  3579. } else {
  3580. error("gres_plugin_step_state_pack: protocol_version "
  3581. "%hu not supported", protocol_version);
  3582. break;
  3583. }
  3584. }
  3585. list_iterator_destroy(gres_iter);
  3586. slurm_mutex_unlock(&gres_context_lock);
  3587. tail_offset = get_buf_offset(buffer);
  3588. set_buf_offset(buffer, top_offset);
  3589. pack16(rec_cnt, buffer);
  3590. set_buf_offset(buffer, tail_offset);
  3591. return rc;
  3592. }
  3593. /*
  3594. * Unpack a step's current gres status, called from slurmctld for save/restore
  3595. * OUT gres_list - restored state stored by gres_plugin_step_state_pack()
  3596. * IN/OUT buffer - location to read state from
  3597. * IN job_id, step_id - job and step ID for logging
  3598. */
  3599. extern int gres_plugin_step_state_unpack(List *gres_list, Buf buffer,
  3600. uint32_t job_id, uint32_t step_id,
  3601. uint16_t protocol_version)
  3602. {
  3603. int i, rc;
  3604. uint32_t magic, plugin_id;
  3605. uint16_t rec_cnt;
  3606. uint8_t has_file;
  3607. gres_state_t *gres_ptr;
  3608. gres_step_state_t *gres_step_ptr = NULL;
  3609. safe_unpack16(&rec_cnt, buffer);
  3610. if (rec_cnt == 0)
  3611. return SLURM_SUCCESS;
  3612. rc = gres_plugin_init();
  3613. slurm_mutex_lock(&gres_context_lock);
  3614. if ((gres_context_cnt > 0) && (*gres_list == NULL)) {
  3615. *gres_list = list_create(_gres_step_list_delete);
  3616. if (*gres_list == NULL)
  3617. fatal("list_create malloc failure");
  3618. }
  3619. while ((rc == SLURM_SUCCESS) && (rec_cnt)) {
  3620. if ((buffer == NULL) || (remaining_buf(buffer) == 0))
  3621. break;
  3622. rec_cnt--;
  3623. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  3624. safe_unpack32(&magic, buffer);
  3625. if (magic != GRES_MAGIC)
  3626. goto unpack_error;
  3627. safe_unpack32(&plugin_id, buffer);
  3628. gres_step_ptr = xmalloc(sizeof(gres_step_state_t));
  3629. safe_unpack32(&gres_step_ptr->gres_cnt_alloc, buffer);
  3630. safe_unpack32(&gres_step_ptr->node_cnt, buffer);
  3631. unpack_bit_str(&gres_step_ptr->node_in_use, buffer);
  3632. safe_unpack8(&has_file, buffer);
  3633. if (has_file) {
  3634. gres_step_ptr->gres_bit_alloc =
  3635. xmalloc(sizeof(bitstr_t *) *
  3636. gres_step_ptr->node_cnt);
  3637. for (i=0; i<gres_step_ptr->node_cnt; i++) {
  3638. unpack_bit_str(&gres_step_ptr->
  3639. gres_bit_alloc[i],
  3640. buffer);
  3641. }
  3642. }
  3643. } else {
  3644. error("gres_plugin_step_state_unpack: protocol_version"
  3645. " %hu not supported", protocol_version);
  3646. goto unpack_error;
  3647. }
  3648. for (i=0; i<gres_context_cnt; i++) {
  3649. if (gres_context[i].plugin_id == plugin_id)
  3650. break;
  3651. }
  3652. if (i >= gres_context_cnt) {
  3653. /* A likely sign that GresPlugins has changed.
  3654. * Not a fatal error, skip over the data. */
  3655. info("gres_plugin_step_state_unpack: no plugin "
  3656. "configured to unpack data type %u from "
  3657. "step %u.%u",
  3658. plugin_id, job_id, step_id);
  3659. _step_state_delete(gres_step_ptr);
  3660. gres_step_ptr = NULL;
  3661. continue;
  3662. }
  3663. gres_ptr = xmalloc(sizeof(gres_state_t));
  3664. gres_ptr->plugin_id = gres_context[i].plugin_id;
  3665. gres_ptr->gres_data = gres_step_ptr;
  3666. gres_step_ptr = NULL;
  3667. list_append(*gres_list, gres_ptr);
  3668. }
  3669. slurm_mutex_unlock(&gres_context_lock);
  3670. return rc;
  3671. unpack_error:
  3672. error("gres_plugin_step_state_unpack: unpack error from step %u.%u",
  3673. job_id, step_id);
  3674. if (gres_step_ptr)
  3675. _step_state_delete(gres_step_ptr);
  3676. slurm_mutex_unlock(&gres_context_lock);
  3677. return SLURM_ERROR;
  3678. }
  3679. /*
  3680. * Set environment variables as required for all tasks of a job step
  3681. * IN/OUT job_env_ptr - environment variable array
  3682. * IN gres_list - generated by gres_plugin_step_allocate()
  3683. */
  3684. extern void gres_plugin_step_set_env(char ***job_env_ptr, List step_gres_list)
  3685. {
  3686. int i;
  3687. ListIterator gres_iter;
  3688. gres_state_t *gres_ptr = NULL;
  3689. (void) gres_plugin_init();
  3690. slurm_mutex_lock(&gres_context_lock);
  3691. for (i=0; i<gres_context_cnt; i++) {
  3692. if (gres_context[i].ops.step_set_env == NULL)
  3693. continue; /* No plugin to call */
  3694. if (step_gres_list) {
  3695. gres_iter = list_iterator_create(step_gres_list);
  3696. while ((gres_ptr = (gres_state_t *)
  3697. list_next(gres_iter))) {
  3698. if (gres_ptr->plugin_id !=
  3699. gres_context[i].plugin_id)
  3700. continue;
  3701. (*(gres_context[i].ops.step_set_env))
  3702. (job_env_ptr, gres_ptr->gres_data);
  3703. break;
  3704. }
  3705. list_iterator_destroy(gres_iter);
  3706. }
  3707. if (gres_ptr == NULL) { /* No data found */
  3708. (*(gres_context[i].ops.step_set_env))
  3709. (job_env_ptr, NULL);
  3710. }
  3711. }
  3712. slurm_mutex_unlock(&gres_context_lock);
  3713. }
  3714. static void _step_state_log(void *gres_data, uint32_t job_id, uint32_t step_id,
  3715. char *gres_name)
  3716. {
  3717. gres_step_state_t *gres_ptr = (gres_step_state_t *) gres_data;
  3718. char tmp_str[128];
  3719. int i;
  3720. xassert(gres_ptr);
  3721. info("gres/%s state for step %u.%u", gres_name, job_id, step_id);
  3722. info(" gres_cnt:%u node_cnt:%u", gres_ptr->gres_cnt_alloc,
  3723. gres_ptr->node_cnt);
  3724. if (gres_ptr->node_in_use == NULL)
  3725. info(" node_in_use:NULL");
  3726. else if (gres_ptr->gres_bit_alloc == NULL)
  3727. info(" gres_bit_alloc:NULL");
  3728. else {
  3729. for (i=0; i<gres_ptr->node_cnt; i++) {
  3730. if (!bit_test(gres_ptr->node_in_use, i))
  3731. continue;
  3732. if (gres_ptr->gres_bit_alloc[i]) {
  3733. bit_fmt(tmp_str, sizeof(tmp_str),
  3734. gres_ptr->gres_bit_alloc[i]);
  3735. info(" gres_bit_alloc[%d]:%s", i, tmp_str);
  3736. } else
  3737. info(" gres_bit_alloc[%d]:NULL", i);
  3738. }
  3739. }
  3740. }
  3741. /*
  3742. * Log a step's current gres state
  3743. * IN gres_list - generated by gres_plugin_step_allocate()
  3744. * IN job_id - job's ID
  3745. */
  3746. extern void gres_plugin_step_state_log(List gres_list, uint32_t job_id,
  3747. uint32_t step_id)
  3748. {
  3749. int i;
  3750. ListIterator gres_iter;
  3751. gres_state_t *gres_ptr;
  3752. if (!gres_debug || (gres_list == NULL))
  3753. return;
  3754. (void) gres_plugin_init();
  3755. slurm_mutex_lock(&gres_context_lock);
  3756. gres_iter = list_iterator_create(gres_list);
  3757. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))) {
  3758. for (i=0; i<gres_context_cnt; i++) {
  3759. if (gres_ptr->plugin_id != gres_context[i].plugin_id)
  3760. continue;
  3761. _step_state_log(gres_ptr->gres_data, job_id, step_id,
  3762. gres_context[i].gres_name);
  3763. break;
  3764. }
  3765. }
  3766. list_iterator_destroy(gres_iter);
  3767. slurm_mutex_unlock(&gres_context_lock);
  3768. }
  3769. /*
  3770. * Determine how many CPUs of a job's allocation can be allocated to a job
  3771. * on a specific node
  3772. * IN job_gres_list - a running job's gres info
  3773. * IN/OUT step_gres_list - a pending job step's gres requirements
  3774. * IN node_offset - index into the job's node allocation
  3775. * IN ignore_alloc - if set ignore resources already allocated to running steps
  3776. * IN job_id, step_id - ID of the step being allocated.
  3777. * RET Count of available CPUs on this node, NO_VAL if no limit
  3778. */
  3779. extern uint32_t gres_plugin_step_test(List step_gres_list, List job_gres_list,
  3780. int node_offset, bool ignore_alloc,
  3781. uint32_t job_id, uint32_t step_id)
  3782. {
  3783. int i;
  3784. uint32_t cpu_cnt, tmp_cnt;
  3785. ListIterator job_gres_iter, step_gres_iter;
  3786. gres_state_t *job_gres_ptr, *step_gres_ptr;
  3787. if (step_gres_list == NULL)
  3788. return NO_VAL;
  3789. if (job_gres_list == NULL)
  3790. return 0;
  3791. cpu_cnt = NO_VAL;
  3792. (void) gres_plugin_init();
  3793. slurm_mutex_lock(&gres_context_lock);
  3794. step_gres_iter = list_iterator_create(step_gres_list);
  3795. while ((step_gres_ptr = (gres_state_t *) list_next(step_gres_iter))) {
  3796. job_gres_iter = list_iterator_create(job_gres_list);
  3797. while ((job_gres_ptr = (gres_state_t *)
  3798. list_next(job_gres_iter))) {
  3799. if (step_gres_ptr->plugin_id == job_gres_ptr->plugin_id)
  3800. break;
  3801. }
  3802. list_iterator_destroy(job_gres_iter);
  3803. if (job_gres_ptr == NULL) {
  3804. /* job lack resources required by the step */
  3805. cpu_cnt = 0;
  3806. break;
  3807. }
  3808. for (i=0; i<gres_context_cnt; i++) {
  3809. if (step_gres_ptr->plugin_id !=
  3810. gres_context[i].plugin_id)
  3811. continue;
  3812. tmp_cnt = _step_test(step_gres_ptr->gres_data,
  3813. job_gres_ptr->gres_data,
  3814. node_offset, ignore_alloc,
  3815. gres_context[i].gres_name,
  3816. job_id, step_id);
  3817. if (tmp_cnt != NO_VAL) {
  3818. if (cpu_cnt == NO_VAL)
  3819. cpu_cnt = tmp_cnt;
  3820. else
  3821. cpu_cnt = MIN(tmp_cnt, cpu_cnt);
  3822. }
  3823. break;
  3824. }
  3825. if (cpu_cnt == 0)
  3826. break;
  3827. }
  3828. list_iterator_destroy(step_gres_iter);
  3829. slurm_mutex_unlock(&gres_context_lock);
  3830. return cpu_cnt;
  3831. }
  3832. static int _step_alloc(void *step_gres_data, void *job_gres_data,
  3833. int node_offset, int cpu_cnt, char *gres_name,
  3834. uint32_t job_id, uint32_t step_id)
  3835. {
  3836. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  3837. gres_step_state_t *step_gres_ptr = (gres_step_state_t *) step_gres_data;
  3838. uint32_t gres_needed;
  3839. bitstr_t *gres_bit_alloc;
  3840. int i, len;
  3841. xassert(job_gres_ptr);
  3842. xassert(step_gres_ptr);
  3843. if (node_offset >= job_gres_ptr->node_cnt) {
  3844. error("gres/%s: step_alloc for %u.%u, node offset invalid "
  3845. "(%d >= %u)",
  3846. gres_name, job_id, step_id, node_offset,
  3847. job_gres_ptr->node_cnt);
  3848. return SLURM_ERROR;
  3849. }
  3850. if (step_gres_ptr->gres_cnt_alloc > job_gres_ptr->gres_cnt_alloc) {
  3851. error("gres/%s: step_alloc for %u.%u, step's > job's "
  3852. "for node %d (%d > %u)",
  3853. gres_name, job_id, step_id, node_offset,
  3854. step_gres_ptr->gres_cnt_alloc,
  3855. job_gres_ptr->gres_cnt_alloc);
  3856. return SLURM_ERROR;
  3857. }
  3858. if (job_gres_ptr->gres_cnt_step_alloc == NULL) {
  3859. job_gres_ptr->gres_cnt_step_alloc =
  3860. xmalloc(sizeof(uint32_t) * job_gres_ptr->node_cnt);
  3861. }
  3862. if (step_gres_ptr->gres_cnt_alloc >
  3863. (job_gres_ptr->gres_cnt_alloc -
  3864. job_gres_ptr->gres_cnt_step_alloc[node_offset])) {
  3865. error("gres/%s: step_alloc for %u.%u, step's > job's "
  3866. "remaining for node %d (%d > (%u - %u))",
  3867. gres_name, job_id, step_id, node_offset,
  3868. step_gres_ptr->gres_cnt_alloc,
  3869. job_gres_ptr->gres_cnt_alloc,
  3870. job_gres_ptr->gres_cnt_step_alloc[node_offset]);
  3871. return SLURM_ERROR;
  3872. }
  3873. step_gres_ptr->node_cnt = job_gres_ptr->node_cnt;
  3874. if (step_gres_ptr->node_in_use == NULL) {
  3875. step_gres_ptr->node_in_use = bit_alloc(job_gres_ptr->node_cnt);
  3876. if (step_gres_ptr->node_in_use == NULL)
  3877. fatal("bit_alloc malloc failure");
  3878. }
  3879. bit_set(step_gres_ptr->node_in_use, node_offset);
  3880. job_gres_ptr->gres_cnt_step_alloc[node_offset] +=
  3881. step_gres_ptr->gres_cnt_alloc;
  3882. if ((job_gres_ptr->gres_bit_alloc == NULL) ||
  3883. (job_gres_ptr->gres_bit_alloc[node_offset] == NULL)) {
  3884. debug("gres/%s: step_alloc gres_bit_alloc for %u.%u is NULL",
  3885. gres_name, job_id, step_id);
  3886. return SLURM_SUCCESS;
  3887. }
  3888. gres_bit_alloc = bit_copy(job_gres_ptr->gres_bit_alloc[node_offset]);
  3889. if (gres_bit_alloc == NULL)
  3890. fatal("bit_copy malloc failure");
  3891. if (job_gres_ptr->gres_bit_step_alloc &&
  3892. job_gres_ptr->gres_bit_step_alloc[node_offset]) {
  3893. bit_not(job_gres_ptr->gres_bit_step_alloc[node_offset]);
  3894. bit_and(gres_bit_alloc,
  3895. job_gres_ptr->gres_bit_step_alloc[node_offset]);
  3896. bit_not(job_gres_ptr->gres_bit_step_alloc[node_offset]);
  3897. }
  3898. gres_needed = step_gres_ptr->gres_cnt_alloc;
  3899. len = bit_size(gres_bit_alloc);
  3900. for (i=0; i<len; i++) {
  3901. if (gres_needed > 0) {
  3902. if (bit_test(gres_bit_alloc, i))
  3903. gres_needed--;
  3904. } else {
  3905. bit_clear(gres_bit_alloc, i);
  3906. }
  3907. }
  3908. if (gres_needed) {
  3909. error("gres/%s: step %u.%u oversubscribed resources on node %d",
  3910. gres_name, job_id, step_id, node_offset);
  3911. }
  3912. if (job_gres_ptr->gres_bit_step_alloc == NULL) {
  3913. job_gres_ptr->gres_bit_step_alloc =
  3914. xmalloc(sizeof(bitstr_t *) * job_gres_ptr->node_cnt);
  3915. }
  3916. if (job_gres_ptr->gres_bit_step_alloc[node_offset]) {
  3917. bit_or(job_gres_ptr->gres_bit_step_alloc[node_offset],
  3918. gres_bit_alloc);
  3919. } else {
  3920. job_gres_ptr->gres_bit_step_alloc[node_offset] =
  3921. bit_copy(gres_bit_alloc);
  3922. }
  3923. if (step_gres_ptr->gres_bit_alloc == NULL) {
  3924. step_gres_ptr->gres_bit_alloc = xmalloc(sizeof(bitstr_t *) *
  3925. job_gres_ptr->node_cnt);
  3926. }
  3927. if (step_gres_ptr->gres_bit_alloc[node_offset]) {
  3928. error("gres/%s: step %u.%u bit_alloc already exists",
  3929. gres_name, job_id, step_id);
  3930. bit_or(step_gres_ptr->gres_bit_alloc[node_offset],
  3931. gres_bit_alloc);
  3932. FREE_NULL_BITMAP(gres_bit_alloc);
  3933. } else {
  3934. step_gres_ptr->gres_bit_alloc[node_offset] = gres_bit_alloc;
  3935. }
  3936. return SLURM_SUCCESS;
  3937. }
  3938. /*
  3939. * Allocate resource to a step and update job and step gres information
  3940. * IN step_gres_list - step's gres_list built by
  3941. * gres_plugin_step_state_validate()
  3942. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  3943. * IN node_offset - job's zero-origin index to the node of interest
  3944. * IN cpu_cnt - number of CPUs allocated to this job on this node
  3945. * IN job_id, step_id - ID of the step being allocated.
  3946. * RET SLURM_SUCCESS or error code
  3947. */
  3948. extern int gres_plugin_step_alloc(List step_gres_list, List job_gres_list,
  3949. int node_offset, int cpu_cnt,
  3950. uint32_t job_id, uint32_t step_id)
  3951. {
  3952. int i, rc, rc2;
  3953. ListIterator step_gres_iter, job_gres_iter;
  3954. gres_state_t *step_gres_ptr, *job_gres_ptr;
  3955. if (step_gres_list == NULL)
  3956. return SLURM_SUCCESS;
  3957. if (job_gres_list == NULL) {
  3958. error("gres_plugin_step_alloc: step allocates gres, but job "
  3959. "%u has none", job_id);
  3960. return SLURM_ERROR;
  3961. }
  3962. rc = gres_plugin_init();
  3963. slurm_mutex_lock(&gres_context_lock);
  3964. step_gres_iter = list_iterator_create(step_gres_list);
  3965. while ((step_gres_ptr = (gres_state_t *) list_next(step_gres_iter))) {
  3966. for (i=0; i<gres_context_cnt; i++) {
  3967. if (step_gres_ptr->plugin_id ==
  3968. gres_context[i].plugin_id)
  3969. break;
  3970. }
  3971. if (i >= gres_context_cnt) {
  3972. error("gres: step_alloc, could not find plugin %u for "
  3973. "step %u.%u",
  3974. step_gres_ptr->plugin_id, job_id, step_id);
  3975. rc = ESLURM_INVALID_GRES;
  3976. break;
  3977. }
  3978. job_gres_iter = list_iterator_create(job_gres_list);
  3979. while ((job_gres_ptr = (gres_state_t *)
  3980. list_next(job_gres_iter))) {
  3981. if (step_gres_ptr->plugin_id == job_gres_ptr->plugin_id)
  3982. break;
  3983. }
  3984. list_iterator_destroy(job_gres_iter);
  3985. if (job_gres_ptr == NULL) {
  3986. info("gres: job %u lacks gres/%s for step %u",
  3987. job_id, gres_context[i].gres_name, step_id);
  3988. rc = ESLURM_INVALID_GRES;
  3989. break;
  3990. }
  3991. rc2 = _step_alloc(step_gres_ptr->gres_data,
  3992. job_gres_ptr->gres_data, node_offset,
  3993. cpu_cnt, gres_context[i].gres_name, job_id,
  3994. step_id);
  3995. if (rc2 != SLURM_SUCCESS)
  3996. rc = rc2;
  3997. }
  3998. list_iterator_destroy(step_gres_iter);
  3999. slurm_mutex_unlock(&gres_context_lock);
  4000. return rc;
  4001. }
  4002. static int _step_dealloc(void *step_gres_data, void *job_gres_data,
  4003. char *gres_name, uint32_t job_id, uint32_t step_id)
  4004. {
  4005. gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data;
  4006. gres_step_state_t *step_gres_ptr = (gres_step_state_t *) step_gres_data;
  4007. uint32_t i, j, node_cnt;
  4008. int len_j, len_s;
  4009. xassert(job_gres_ptr);
  4010. xassert(step_gres_ptr);
  4011. if (step_gres_ptr->node_in_use == NULL) {
  4012. error("gres/%s: step %u.%u dealloc, node_in_use is NULL",
  4013. gres_name, job_id, step_id);
  4014. return SLURM_ERROR;
  4015. }
  4016. node_cnt = MIN(job_gres_ptr->node_cnt, step_gres_ptr->node_cnt);
  4017. for (i=0; i<node_cnt; i++) {
  4018. if (!bit_test(step_gres_ptr->node_in_use, i))
  4019. continue;
  4020. if (job_gres_ptr->gres_cnt_step_alloc) {
  4021. if (job_gres_ptr->gres_cnt_step_alloc[i] >=
  4022. step_gres_ptr->gres_cnt_alloc) {
  4023. job_gres_ptr->gres_cnt_step_alloc[i] -=
  4024. step_gres_ptr->gres_cnt_alloc;
  4025. } else {
  4026. error("gres/%s: step %u.%u dealloc count "
  4027. "underflow",
  4028. gres_name, job_id, step_id);
  4029. job_gres_ptr->gres_cnt_step_alloc[i] = 0;
  4030. }
  4031. }
  4032. if ((step_gres_ptr->gres_bit_alloc == NULL) ||
  4033. (step_gres_ptr->gres_bit_alloc[i] == NULL))
  4034. continue;
  4035. if (job_gres_ptr->gres_bit_alloc[i] == NULL) {
  4036. error("gres/%s: step dealloc, job %u gres_bit_alloc[%d]"
  4037. " is NULL", gres_name, job_id, i);
  4038. continue;
  4039. }
  4040. len_j = bit_size(job_gres_ptr->gres_bit_alloc[i]);
  4041. len_s = bit_size(step_gres_ptr->gres_bit_alloc[i]);
  4042. if (len_j != len_s) {
  4043. error("gres/%s: step %u.%u dealloc, bit_alloc[%d] size "
  4044. "mis-match (%d != %d)",
  4045. gres_name, job_id, step_id, i, len_j, len_s);
  4046. len_j = MIN(len_j, len_s);
  4047. }
  4048. for (j=0; j<len_j; j++) {
  4049. if (!bit_test(step_gres_ptr->gres_bit_alloc[i], j))
  4050. continue;
  4051. if (job_gres_ptr->gres_bit_step_alloc &&
  4052. job_gres_ptr->gres_bit_step_alloc[i]) {
  4053. bit_clear(job_gres_ptr->gres_bit_step_alloc[i],
  4054. j);
  4055. }
  4056. }
  4057. FREE_NULL_BITMAP(step_gres_ptr->gres_bit_alloc[i]);
  4058. }
  4059. return SLURM_SUCCESS;
  4060. }
  4061. /*
  4062. * Deallocate resource to a step and update job and step gres information
  4063. * IN step_gres_list - step's gres_list built by
  4064. * gres_plugin_step_state_validate()
  4065. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  4066. * IN job_id, step_id - ID of the step being allocated.
  4067. * RET SLURM_SUCCESS or error code
  4068. */
  4069. extern int gres_plugin_step_dealloc(List step_gres_list, List job_gres_list,
  4070. uint32_t job_id, uint32_t step_id)
  4071. {
  4072. int i, rc, rc2;
  4073. ListIterator step_gres_iter, job_gres_iter;
  4074. gres_state_t *step_gres_ptr, *job_gres_ptr;
  4075. if (step_gres_list == NULL)
  4076. return SLURM_SUCCESS;
  4077. if (job_gres_list == NULL) {
  4078. error("gres_plugin_step_alloc: step deallocates gres, but job "
  4079. "%u has none", job_id);
  4080. return SLURM_ERROR;
  4081. }
  4082. rc = gres_plugin_init();
  4083. slurm_mutex_lock(&gres_context_lock);
  4084. step_gres_iter = list_iterator_create(step_gres_list);
  4085. while ((step_gres_ptr = (gres_state_t *) list_next(step_gres_iter))) {
  4086. job_gres_iter = list_iterator_create(job_gres_list);
  4087. while ((job_gres_ptr = (gres_state_t *)
  4088. list_next(job_gres_iter))) {
  4089. if (step_gres_ptr->plugin_id == job_gres_ptr->plugin_id)
  4090. break;
  4091. }
  4092. list_iterator_destroy(job_gres_iter);
  4093. if (job_gres_ptr == NULL)
  4094. continue;
  4095. for (i=0; i<gres_context_cnt; i++) {
  4096. if (step_gres_ptr->plugin_id !=
  4097. gres_context[i].plugin_id)
  4098. continue;
  4099. rc2 = _step_dealloc(step_gres_ptr->gres_data,
  4100. job_gres_ptr->gres_data,
  4101. gres_context[i].gres_name, job_id,
  4102. step_id);
  4103. if (rc2 != SLURM_SUCCESS)
  4104. rc = rc2;
  4105. break;
  4106. }
  4107. }
  4108. list_iterator_destroy(step_gres_iter);
  4109. slurm_mutex_unlock(&gres_context_lock);
  4110. return rc;
  4111. }
  4112. /*
  4113. * Determine how many GRES of a given type are allocated to a job
  4114. * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
  4115. * IN gres_name - name of a GRES type
  4116. * RET count of this GRES allocated to this job
  4117. */
  4118. extern uint32_t gres_get_value_by_type(List job_gres_list, char* gres_name)
  4119. {
  4120. int i;
  4121. uint32_t gres_cnt = 0, plugin_id;
  4122. ListIterator job_gres_iter;
  4123. gres_state_t *job_gres_ptr;
  4124. gres_job_state_t *job_gres_data;
  4125. if (job_gres_list == NULL)
  4126. return NO_VAL;
  4127. gres_cnt = NO_VAL;
  4128. (void) gres_plugin_init();
  4129. plugin_id = _build_id(gres_name);
  4130. slurm_mutex_lock(&gres_context_lock);
  4131. job_gres_iter = list_iterator_create(job_gres_list);
  4132. if (!job_gres_iter)
  4133. fatal("list_iterator_create: malloc failure");
  4134. while ((job_gres_ptr = (gres_state_t *) list_next(job_gres_iter))) {
  4135. for (i=0; i<gres_context_cnt; i++) {
  4136. if (job_gres_ptr->plugin_id != plugin_id)
  4137. continue;
  4138. job_gres_data = (gres_job_state_t *)
  4139. job_gres_ptr->gres_data;
  4140. gres_cnt = job_gres_data->gres_cnt_alloc;
  4141. break;
  4142. }
  4143. }
  4144. list_iterator_destroy(job_gres_iter);
  4145. slurm_mutex_unlock(&gres_context_lock);
  4146. return gres_cnt;
  4147. }
  4148. /*
  4149. * Fill in an array of GRES type ids contained within the given gres_list
  4150. * and an array of corresponding counts of those GRES types.
  4151. * IN gres_list - a List of GRES types found on a node.
  4152. * IN arrlen - Length of the arrays (the number of elements in the gres_list).
  4153. * IN gres_count_ids, gres_count_vals - the GRES type ID's and values found
  4154. * in the gres_list.
  4155. * RET SLURM_SUCCESS or error code
  4156. */
  4157. extern int gres_num_gres_alloced_all(List gres_list, int arrlen,
  4158. int* gres_count_ids, int* gres_count_vals, int valtype)
  4159. {
  4160. ListIterator node_gres_iter;
  4161. gres_state_t* node_gres_ptr;
  4162. void* node_gres_data;
  4163. uint32_t val;
  4164. int rc, ix = 0;
  4165. rc = gres_plugin_init();
  4166. if ((rc == SLURM_SUCCESS) && (arrlen <= 0))
  4167. rc = EINVAL;
  4168. if (rc != SLURM_SUCCESS)
  4169. return rc;
  4170. slurm_mutex_lock(&gres_context_lock);
  4171. node_gres_iter = list_iterator_create(gres_list);
  4172. if (!node_gres_iter)
  4173. fatal("list_iterator_create: malloc failure");
  4174. while ((node_gres_ptr = (gres_state_t*) list_next(node_gres_iter))) {
  4175. gres_node_state_t *node_gres_state_ptr;
  4176. val = 0;
  4177. node_gres_data = node_gres_ptr->gres_data;
  4178. node_gres_state_ptr = (gres_node_state_t *) node_gres_data;
  4179. xassert(node_gres_state_ptr);
  4180. switch(valtype) {
  4181. case(GRES_VAL_TYPE_FOUND):
  4182. val = node_gres_state_ptr->gres_cnt_found;
  4183. break;
  4184. case(GRES_VAL_TYPE_CONFIG):
  4185. val = node_gres_state_ptr->gres_cnt_config;
  4186. break;
  4187. case(GRES_VAL_TYPE_AVAIL):
  4188. val = node_gres_state_ptr->gres_cnt_avail;
  4189. break;
  4190. case(GRES_VAL_TYPE_ALLOC):
  4191. val = node_gres_state_ptr->gres_cnt_alloc;
  4192. }
  4193. gres_count_ids[ix] = node_gres_ptr->plugin_id;
  4194. gres_count_vals[ix] = val;
  4195. if (++ix >= arrlen)
  4196. break;
  4197. }
  4198. list_iterator_destroy(node_gres_iter);
  4199. slurm_mutex_unlock(&gres_context_lock);
  4200. return rc;
  4201. }
  4202. extern void gres_plugin_step_state_file(List gres_list, int *gres_bit_alloc,
  4203. int *gres_count)
  4204. {
  4205. int i, j, p, gres_cnt = 0, len, found;
  4206. ListIterator gres_iter;
  4207. gres_state_t *gres_ptr;
  4208. gres_step_state_t *gres_step_ptr;
  4209. if (gres_list == NULL)
  4210. return;
  4211. (void) gres_plugin_init();
  4212. slurm_mutex_lock(&gres_context_lock);
  4213. gres_iter = list_iterator_create(gres_list);
  4214. if (!gres_iter)
  4215. fatal("list_iterator_create: malloc failure");
  4216. for (j=0; j<gres_context_cnt; j++) {
  4217. found = 0;
  4218. list_iterator_reset(gres_iter);
  4219. while ((gres_ptr = (gres_state_t *) list_next(gres_iter))){
  4220. if (gres_ptr->plugin_id !=
  4221. gres_context[j].plugin_id) {
  4222. continue;
  4223. }
  4224. found = 1;
  4225. gres_step_ptr = (gres_step_state_t *) gres_ptr->gres_data;
  4226. if ((gres_step_ptr != NULL) &&
  4227. (gres_step_ptr->node_cnt == 1) &&
  4228. (gres_step_ptr->gres_bit_alloc != NULL) &&
  4229. (gres_step_ptr->gres_bit_alloc[0] != NULL)) {
  4230. len = bit_size(gres_step_ptr->gres_bit_alloc[0]);
  4231. for (i=0; i<len; i++) {
  4232. if (!bit_test(gres_step_ptr->
  4233. gres_bit_alloc[0], i))
  4234. gres_bit_alloc[gres_cnt] = 0;
  4235. else
  4236. gres_bit_alloc[gres_cnt] = 1;
  4237. gres_cnt++;
  4238. }
  4239. }
  4240. break;
  4241. }
  4242. if (found == 0) {
  4243. for (p=0; p<gres_count[j]; p++){
  4244. gres_bit_alloc[gres_cnt] = 0;
  4245. gres_cnt++;
  4246. }
  4247. }
  4248. }
  4249. list_iterator_destroy(gres_iter);
  4250. slurm_mutex_unlock(&gres_context_lock);
  4251. }
  4252. /* Send GRES information to slurmstepd on the specified file descriptor*/
  4253. extern void gres_plugin_send_stepd(int fd)
  4254. {
  4255. int i;
  4256. (void) gres_plugin_init();
  4257. slurm_mutex_lock(&gres_context_lock);
  4258. for (i = 0; i < gres_context_cnt; i++) {
  4259. if (gres_context[i].ops.send_stepd == NULL)
  4260. continue; /* No plugin to call */
  4261. (*(gres_context[i].ops.send_stepd)) (fd);
  4262. break;
  4263. }
  4264. slurm_mutex_unlock(&gres_context_lock);
  4265. }
  4266. /* Receive GRES information from slurmd on the specified file descriptor*/
  4267. extern void gres_plugin_recv_stepd(int fd)
  4268. {
  4269. int i;
  4270. (void) gres_plugin_init();
  4271. slurm_mutex_lock(&gres_context_lock);
  4272. for (i = 0; i < gres_context_cnt; i++) {
  4273. if (gres_context[i].ops.recv_stepd == NULL)
  4274. continue; /* No plugin to call */
  4275. (*(gres_context[i].ops.recv_stepd)) (fd);
  4276. break;
  4277. }
  4278. slurm_mutex_unlock(&gres_context_lock);
  4279. }