PageRenderTime 59ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/src/common/job_resources.c

https://github.com/cfenoy/slurm
C | 1381 lines | 1139 code | 144 blank | 98 comment | 262 complexity | 0730b7aa02f6a146e661b869230e7fd0 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * job_resources.c - functions to manage data structure identifying specific
  3. * CPUs allocated to a job, step or partition
  4. *****************************************************************************
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Written by Morris Jette <jette1@llnl.gov>.
  7. * CODE-OCEC-09-009. All rights reserved.
  8. *
  9. * This file is part of SLURM, a resource management program.
  10. * For details, see <http://www.schedmd.com/slurmdocs/>.
  11. * Please also read the included file: DISCLAIMER.
  12. *
  13. * SLURM is free software; you can redistribute it and/or modify it under
  14. * the terms of the GNU General Public License as published by the Free
  15. * Software Foundation; either version 2 of the License, or (at your option)
  16. * any later version.
  17. *
  18. * In addition, as a special exception, the copyright holders give permission
  19. * to link the code of portions of this program with the OpenSSL library under
  20. * certain conditions as described in each individual source file, and
  21. * distribute linked combinations including the two. You must obey the GNU
  22. * General Public License in all respects for all of the code used other than
  23. * OpenSSL. If you modify file(s) with this exception, you may extend this
  24. * exception to your version of the file(s), but you are not obligated to do
  25. * so. If you do not wish to do so, delete this exception statement from your
  26. * version. If you delete this exception statement from all source files in
  27. * the program, then also delete it here.
  28. *
  29. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  30. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  31. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  32. * details.
  33. *
  34. * You should have received a copy of the GNU General Public License along
  35. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  36. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  37. \*****************************************************************************/
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include "slurm/slurm_errno.h"
  41. #include "src/common/hostlist.h"
  42. #include "src/common/job_resources.h"
  43. #include "src/common/log.h"
  44. #include "src/common/pack.h"
  45. #include "src/common/xassert.h"
  46. #include "src/common/xmalloc.h"
  47. #include "src/slurmctld/slurmctld.h"
  48. /* Create an empty job_resources data structure */
  49. extern job_resources_t *create_job_resources(void)
  50. {
  51. job_resources_t *job_resrcs;
  52. job_resrcs = xmalloc(sizeof(struct job_resources));
  53. return job_resrcs;
  54. }
  55. /* Set the socket and core counts associated with a set of selected
  56. * nodes of a job_resources data structure based upon slurmctld state.
  57. * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based
  58. * upon the value of node_bitmap, also creates core_bitmap based upon
  59. * the total number of cores in the allocation). Call this ONLY from
  60. * slurmctld. Example of use:
  61. *
  62. * job_resources_t *job_resrcs_ptr = create_job_resources();
  63. * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap));
  64. * rc = build_job_resources(job_resrcs_ptr, node_record_table_ptr,
  65. * slurmctld_conf.fast_schedule);
  66. */
  67. extern int build_job_resources(job_resources_t *job_resrcs,
  68. void *node_rec_table, uint16_t fast_schedule)
  69. {
  70. int i, bitmap_len;
  71. int core_cnt = 0, sock_inx = -1;
  72. uint32_t cores, socks;
  73. struct node_record *node_ptr, *node_record_table;
  74. if (job_resrcs->node_bitmap == NULL) {
  75. error("build_job_resources: node_bitmap is NULL");
  76. return SLURM_ERROR;
  77. }
  78. node_record_table = (struct node_record *) node_rec_table;
  79. xfree(job_resrcs->sockets_per_node);
  80. xfree(job_resrcs->cores_per_socket);
  81. xfree(job_resrcs->sock_core_rep_count);
  82. job_resrcs->sockets_per_node = xmalloc(sizeof(uint16_t) *
  83. job_resrcs->nhosts);
  84. job_resrcs->cores_per_socket = xmalloc(sizeof(uint16_t) *
  85. job_resrcs->nhosts);
  86. job_resrcs->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
  87. job_resrcs->nhosts);
  88. bitmap_len = bit_size(job_resrcs->node_bitmap);
  89. for (i=0; i<bitmap_len; i++) {
  90. if (!bit_test(job_resrcs->node_bitmap, i))
  91. continue;
  92. node_ptr = node_record_table + i;
  93. if (fast_schedule) {
  94. socks = node_ptr->config_ptr->sockets;
  95. cores = node_ptr->config_ptr->cores;
  96. } else {
  97. socks = node_ptr->sockets;
  98. cores = node_ptr->cores;
  99. }
  100. if ((sock_inx < 0) ||
  101. (socks != job_resrcs->sockets_per_node[sock_inx]) ||
  102. (cores != job_resrcs->cores_per_socket[sock_inx])) {
  103. sock_inx++;
  104. job_resrcs->sockets_per_node[sock_inx] = socks;
  105. job_resrcs->cores_per_socket[sock_inx] = cores;
  106. }
  107. job_resrcs->sock_core_rep_count[sock_inx]++;
  108. core_cnt += (cores * socks);
  109. }
  110. #ifndef HAVE_BG
  111. job_resrcs->core_bitmap = bit_alloc(core_cnt);
  112. job_resrcs->core_bitmap_used = bit_alloc(core_cnt);
  113. if ((job_resrcs->core_bitmap == NULL) ||
  114. (job_resrcs->core_bitmap_used == NULL))
  115. fatal("bit_alloc malloc failure");
  116. #endif
  117. return SLURM_SUCCESS;
  118. }
  119. /* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the
  120. * values of nhosts and cpus in an existing data structure
  121. * Return total CPU count or -1 on error */
  122. extern int build_job_resources_cpu_array(job_resources_t *job_resrcs_ptr)
  123. {
  124. int cpu_count = 0, i;
  125. uint32_t last_cpu_cnt = NO_VAL;
  126. if (job_resrcs_ptr->nhosts == 0)
  127. return cpu_count; /* no work to do */
  128. if (job_resrcs_ptr->cpus == NULL) {
  129. error("build_job_resources_cpu_array: cpus==NULL");
  130. return -1;
  131. }
  132. /* clear vestigial data and create new arrays of max size */
  133. job_resrcs_ptr->cpu_array_cnt = 0;
  134. xfree(job_resrcs_ptr->cpu_array_reps);
  135. job_resrcs_ptr->cpu_array_reps =
  136. xmalloc(job_resrcs_ptr->nhosts * sizeof(uint32_t));
  137. xfree(job_resrcs_ptr->cpu_array_value);
  138. job_resrcs_ptr->cpu_array_value =
  139. xmalloc(job_resrcs_ptr->nhosts * sizeof(uint16_t));
  140. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  141. if (job_resrcs_ptr->cpus[i] != last_cpu_cnt) {
  142. last_cpu_cnt = job_resrcs_ptr->cpus[i];
  143. job_resrcs_ptr->cpu_array_value[
  144. job_resrcs_ptr->cpu_array_cnt]
  145. = last_cpu_cnt;
  146. job_resrcs_ptr->cpu_array_reps[
  147. job_resrcs_ptr->cpu_array_cnt] = 1;
  148. job_resrcs_ptr->cpu_array_cnt++;
  149. } else {
  150. job_resrcs_ptr->cpu_array_reps[
  151. job_resrcs_ptr->cpu_array_cnt-1]++;
  152. }
  153. cpu_count += last_cpu_cnt;
  154. }
  155. return cpu_count;
  156. }
  157. /* Rebuild cpus array based upon the values of nhosts, cpu_array_value and
  158. * cpu_array_reps in an existing data structure
  159. * Return total CPU count or -1 on error */
  160. extern int build_job_resources_cpus_array(job_resources_t *job_resrcs_ptr)
  161. {
  162. int cpu_count = 0, cpu_inx, i, j;
  163. if (job_resrcs_ptr->nhosts == 0)
  164. return cpu_count; /* no work to do */
  165. if (job_resrcs_ptr->cpu_array_cnt == 0) {
  166. error("build_job_resources_cpus_array: cpu_array_cnt==0");
  167. return -1;
  168. }
  169. if (job_resrcs_ptr->cpu_array_value == NULL) {
  170. error("build_job_resources_cpus_array: cpu_array_value==NULL");
  171. return -1;
  172. }
  173. if (job_resrcs_ptr->cpu_array_reps == NULL) {
  174. error("build_job_resources_cpus_array: cpu_array_reps==NULL");
  175. return -1;
  176. }
  177. /* clear vestigial data and create new arrays of max size */
  178. xfree(job_resrcs_ptr->cpus);
  179. job_resrcs_ptr->cpus =
  180. xmalloc(job_resrcs_ptr->nhosts * sizeof(uint16_t));
  181. cpu_inx = 0;
  182. for (i=0; i<job_resrcs_ptr->cpu_array_cnt; i++) {
  183. for (j=0; j<job_resrcs_ptr->cpu_array_reps[i]; j++) {
  184. if (cpu_inx >= job_resrcs_ptr->nhosts) {
  185. error("build_job_resources_cpus_array: "
  186. "cpu_array is too long");
  187. return -1;
  188. }
  189. cpu_count += job_resrcs_ptr->cpus[i];
  190. job_resrcs_ptr->cpus[cpu_inx++] =
  191. job_resrcs_ptr->cpus[i];
  192. }
  193. }
  194. if (cpu_inx < job_resrcs_ptr->nhosts) {
  195. error("build_job_resources_cpus_array: "
  196. "cpu_array is incomplete");
  197. return -1;
  198. }
  199. return cpu_count;
  200. }
  201. /* Reset the node_bitmap in a job_resources data structure
  202. * This is needed after a restart/reconfiguration since nodes can
  203. * be added or removed from the system resulting in changing in
  204. * the bitmap size or bit positions */
  205. extern int reset_node_bitmap(job_resources_t *job_resrcs_ptr, uint32_t job_id)
  206. {
  207. int i;
  208. if (!job_resrcs_ptr)
  209. return SLURM_SUCCESS;
  210. if (job_resrcs_ptr->node_bitmap)
  211. FREE_NULL_BITMAP(job_resrcs_ptr->node_bitmap);
  212. if (job_resrcs_ptr->nodes &&
  213. (node_name2bitmap(job_resrcs_ptr->nodes, false,
  214. &job_resrcs_ptr->node_bitmap))) {
  215. error("Invalid nodes (%s) for job_id %u",
  216. job_resrcs_ptr->nodes, job_id);
  217. return SLURM_ERROR;
  218. } else if (job_resrcs_ptr->nodes == NULL) {
  219. job_resrcs_ptr->node_bitmap = bit_alloc(node_record_count);
  220. }
  221. i = bit_set_count(job_resrcs_ptr->node_bitmap);
  222. if (job_resrcs_ptr->nhosts != i) {
  223. error("Invalid change in resource allocation node count for "
  224. "job %u, %u to %d", job_id, job_resrcs_ptr->nhosts, i);
  225. return SLURM_ERROR;
  226. }
  227. return SLURM_SUCCESS;
  228. }
  229. extern int valid_job_resources(job_resources_t *job_resrcs,
  230. void *node_rec_table,
  231. uint16_t fast_schedule)
  232. {
  233. int i, bitmap_len;
  234. int sock_inx = 0, sock_cnt = 0;
  235. uint32_t cores, socks;
  236. struct node_record *node_ptr, *node_record_table;
  237. if (job_resrcs->node_bitmap == NULL) {
  238. error("valid_job_resources: node_bitmap is NULL");
  239. return SLURM_ERROR;
  240. }
  241. if ((job_resrcs->sockets_per_node == NULL) ||
  242. (job_resrcs->cores_per_socket == NULL) ||
  243. (job_resrcs->sock_core_rep_count == NULL)) {
  244. error("valid_job_resources: socket/core array is NULL");
  245. return SLURM_ERROR;
  246. }
  247. node_record_table = (struct node_record *) node_rec_table;
  248. bitmap_len = bit_size(job_resrcs->node_bitmap);
  249. for (i=0; i<bitmap_len; i++) {
  250. if (!bit_test(job_resrcs->node_bitmap, i))
  251. continue;
  252. node_ptr = node_record_table + i;
  253. if (fast_schedule) {
  254. socks = node_ptr->config_ptr->sockets;
  255. cores = node_ptr->config_ptr->cores;
  256. } else {
  257. socks = node_ptr->sockets;
  258. cores = node_ptr->cores;
  259. }
  260. if (sock_cnt >= job_resrcs->sock_core_rep_count[sock_inx]) {
  261. sock_inx++;
  262. sock_cnt = 0;
  263. }
  264. if ((socks != job_resrcs->sockets_per_node[sock_inx]) ||
  265. (cores != job_resrcs->cores_per_socket[sock_inx])) {
  266. error("valid_job_resources: "
  267. "%s sockets:%u,%u, cores %u,%u",
  268. node_ptr->name,
  269. socks,
  270. job_resrcs->sockets_per_node[sock_inx],
  271. cores,
  272. job_resrcs->cores_per_socket[sock_inx]);
  273. return SLURM_ERROR;
  274. }
  275. sock_cnt++;
  276. }
  277. return SLURM_SUCCESS;
  278. }
  279. extern job_resources_t *copy_job_resources(job_resources_t *job_resrcs_ptr)
  280. {
  281. int i, sock_inx = 0;
  282. job_resources_t *new_layout = xmalloc(sizeof(struct job_resources));
  283. xassert(job_resrcs_ptr);
  284. new_layout->nhosts = job_resrcs_ptr->nhosts;
  285. new_layout->ncpus = job_resrcs_ptr->ncpus;
  286. new_layout->node_req = job_resrcs_ptr->node_req;
  287. if (job_resrcs_ptr->core_bitmap) {
  288. new_layout->core_bitmap = bit_copy(job_resrcs_ptr->
  289. core_bitmap);
  290. }
  291. if (job_resrcs_ptr->core_bitmap_used) {
  292. new_layout->core_bitmap_used = bit_copy(job_resrcs_ptr->
  293. core_bitmap_used);
  294. }
  295. if (job_resrcs_ptr->node_bitmap) {
  296. new_layout->node_bitmap = bit_copy(job_resrcs_ptr->
  297. node_bitmap);
  298. }
  299. new_layout->cpu_array_cnt = job_resrcs_ptr->cpu_array_cnt;
  300. if (job_resrcs_ptr->cpu_array_reps &&
  301. job_resrcs_ptr->cpu_array_cnt) {
  302. new_layout->cpu_array_reps =
  303. xmalloc(sizeof(uint32_t) *
  304. job_resrcs_ptr->cpu_array_cnt);
  305. memcpy(new_layout->cpu_array_reps,
  306. job_resrcs_ptr->cpu_array_reps,
  307. (sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt));
  308. }
  309. if (job_resrcs_ptr->cpu_array_value &&
  310. job_resrcs_ptr->cpu_array_cnt) {
  311. new_layout->cpu_array_value =
  312. xmalloc(sizeof(uint16_t) *
  313. job_resrcs_ptr->cpu_array_cnt);
  314. memcpy(new_layout->cpu_array_value,
  315. job_resrcs_ptr->cpu_array_value,
  316. (sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt));
  317. }
  318. if (job_resrcs_ptr->cpus) {
  319. new_layout->cpus = xmalloc(sizeof(uint16_t) *
  320. job_resrcs_ptr->nhosts);
  321. memcpy(new_layout->cpus, job_resrcs_ptr->cpus,
  322. (sizeof(uint16_t) * job_resrcs_ptr->nhosts));
  323. }
  324. if (job_resrcs_ptr->cpus_used) {
  325. new_layout->cpus_used = xmalloc(sizeof(uint16_t) *
  326. job_resrcs_ptr->nhosts);
  327. memcpy(new_layout->cpus_used, job_resrcs_ptr->cpus_used,
  328. (sizeof(uint16_t) * job_resrcs_ptr->nhosts));
  329. }
  330. if (job_resrcs_ptr->memory_allocated) {
  331. new_layout->memory_allocated = xmalloc(sizeof(uint32_t) *
  332. new_layout->nhosts);
  333. memcpy(new_layout->memory_allocated,
  334. job_resrcs_ptr->memory_allocated,
  335. (sizeof(uint32_t) * job_resrcs_ptr->nhosts));
  336. }
  337. if (job_resrcs_ptr->memory_used) {
  338. new_layout->memory_used = xmalloc(sizeof(uint32_t) *
  339. new_layout->nhosts);
  340. memcpy(new_layout->memory_used,
  341. job_resrcs_ptr->memory_used,
  342. (sizeof(uint32_t) * job_resrcs_ptr->nhosts));
  343. }
  344. /* Copy sockets_per_node, cores_per_socket and core_sock_rep_count */
  345. new_layout->sockets_per_node = xmalloc(sizeof(uint16_t) *
  346. new_layout->nhosts);
  347. new_layout->cores_per_socket = xmalloc(sizeof(uint16_t) *
  348. new_layout->nhosts);
  349. new_layout->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
  350. new_layout->nhosts);
  351. for (i=0; i<new_layout->nhosts; i++) {
  352. if (job_resrcs_ptr->sock_core_rep_count[i] == 0) {
  353. error("copy_job_resources: sock_core_rep_count=0");
  354. break;
  355. }
  356. sock_inx += job_resrcs_ptr->sock_core_rep_count[i];
  357. if (sock_inx >= job_resrcs_ptr->nhosts) {
  358. i++;
  359. break;
  360. }
  361. }
  362. memcpy(new_layout->sockets_per_node,
  363. job_resrcs_ptr->sockets_per_node, (sizeof(uint16_t) * i));
  364. memcpy(new_layout->cores_per_socket,
  365. job_resrcs_ptr->cores_per_socket, (sizeof(uint16_t) * i));
  366. memcpy(new_layout->sock_core_rep_count,
  367. job_resrcs_ptr->sock_core_rep_count,
  368. (sizeof(uint32_t) * i));
  369. return new_layout;
  370. }
  371. extern void free_job_resources(job_resources_t **job_resrcs_pptr)
  372. {
  373. job_resources_t *job_resrcs_ptr = *job_resrcs_pptr;
  374. if (job_resrcs_ptr) {
  375. FREE_NULL_BITMAP(job_resrcs_ptr->core_bitmap);
  376. FREE_NULL_BITMAP(job_resrcs_ptr->core_bitmap_used);
  377. xfree(job_resrcs_ptr->cores_per_socket);
  378. xfree(job_resrcs_ptr->cpu_array_reps);
  379. xfree(job_resrcs_ptr->cpu_array_value);
  380. xfree(job_resrcs_ptr->cpus);
  381. xfree(job_resrcs_ptr->cpus_used);
  382. xfree(job_resrcs_ptr->memory_allocated);
  383. xfree(job_resrcs_ptr->memory_used);
  384. FREE_NULL_BITMAP(job_resrcs_ptr->node_bitmap);
  385. xfree(job_resrcs_ptr->nodes);
  386. xfree(job_resrcs_ptr->sock_core_rep_count);
  387. xfree(job_resrcs_ptr->sockets_per_node);
  388. xfree(job_resrcs_ptr);
  389. *job_resrcs_pptr = NULL;
  390. }
  391. }
  392. /* Log the contents of a job_resources data structure using info() */
  393. extern void log_job_resources(uint32_t job_id,
  394. job_resources_t *job_resrcs_ptr)
  395. {
  396. int bit_inx = 0, bit_reps, i;
  397. int array_size, node_inx;
  398. int sock_inx = 0, sock_reps = 0;
  399. if (job_resrcs_ptr == NULL) {
  400. error("log_job_resources: job_resrcs_ptr is NULL");
  401. return;
  402. }
  403. info("====================");
  404. info("job_id:%u nhosts:%u ncpus:%u node_req:%u nodes=%s",
  405. job_id, job_resrcs_ptr->nhosts, job_resrcs_ptr->ncpus,
  406. job_resrcs_ptr->node_req, job_resrcs_ptr->nodes);
  407. if (job_resrcs_ptr->cpus == NULL) {
  408. error("log_job_resources: cpus array is NULL");
  409. return;
  410. }
  411. if (job_resrcs_ptr->memory_allocated == NULL) {
  412. error("log_job_resources: memory array is NULL");
  413. return;
  414. }
  415. if ((job_resrcs_ptr->cores_per_socket == NULL) ||
  416. (job_resrcs_ptr->sockets_per_node == NULL) ||
  417. (job_resrcs_ptr->sock_core_rep_count == NULL)) {
  418. error("log_job_resources: socket/core array is NULL");
  419. return;
  420. }
  421. if (job_resrcs_ptr->core_bitmap == NULL) {
  422. error("log_job_resources: core_bitmap is NULL");
  423. return;
  424. }
  425. if (job_resrcs_ptr->core_bitmap_used == NULL) {
  426. error("log_job_resources: core_bitmap_used is NULL");
  427. return;
  428. }
  429. array_size = bit_size(job_resrcs_ptr->core_bitmap);
  430. /* Can only log node_bitmap from slurmctld, so don't bother here */
  431. for (node_inx=0; node_inx<job_resrcs_ptr->nhosts; node_inx++) {
  432. uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0;
  433. info("Node[%d]:", node_inx);
  434. if (sock_reps >=
  435. job_resrcs_ptr->sock_core_rep_count[sock_inx]) {
  436. sock_inx++;
  437. sock_reps = 0;
  438. }
  439. sock_reps++;
  440. if (job_resrcs_ptr->cpus_used)
  441. cpus_used = job_resrcs_ptr->cpus_used[node_inx];
  442. if (job_resrcs_ptr->memory_used)
  443. memory_used = job_resrcs_ptr->memory_used[node_inx];
  444. if (job_resrcs_ptr->memory_allocated)
  445. memory_allocated = job_resrcs_ptr->
  446. memory_allocated[node_inx];
  447. info(" Mem(MB):%u:%u Sockets:%u Cores:%u CPUs:%u:%u",
  448. memory_allocated, memory_used,
  449. job_resrcs_ptr->sockets_per_node[sock_inx],
  450. job_resrcs_ptr->cores_per_socket[sock_inx],
  451. job_resrcs_ptr->cpus[node_inx],
  452. cpus_used);
  453. bit_reps = job_resrcs_ptr->sockets_per_node[sock_inx] *
  454. job_resrcs_ptr->cores_per_socket[sock_inx];
  455. for (i=0; i<bit_reps; i++) {
  456. if (bit_inx >= array_size) {
  457. error("log_job_resources: array size wrong");
  458. break;
  459. }
  460. if (bit_test(job_resrcs_ptr->core_bitmap,
  461. bit_inx)) {
  462. char *core_used = "";
  463. if (bit_test(job_resrcs_ptr->
  464. core_bitmap_used, bit_inx))
  465. core_used = " and in use";
  466. info(" Socket[%d] Core[%d] is allocated%s",
  467. (i / job_resrcs_ptr->
  468. cores_per_socket[sock_inx]),
  469. (i % job_resrcs_ptr->
  470. cores_per_socket[sock_inx]),
  471. core_used);
  472. }
  473. bit_inx++;
  474. }
  475. }
  476. for (node_inx=0; node_inx<job_resrcs_ptr->cpu_array_cnt;
  477. node_inx++) {
  478. if (node_inx == 0)
  479. info("--------------------");
  480. info("cpu_array_value[%d]:%u reps:%u", node_inx,
  481. job_resrcs_ptr->cpu_array_value[node_inx],
  482. job_resrcs_ptr->cpu_array_reps[node_inx]);
  483. }
  484. info("====================");
  485. }
  486. extern void pack_job_resources(job_resources_t *job_resrcs_ptr, Buf buffer,
  487. uint16_t protocol_version)
  488. {
  489. uint32_t cluster_flags = slurmdb_setup_cluster_flags();
  490. if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
  491. if (job_resrcs_ptr == NULL) {
  492. uint32_t empty = NO_VAL;
  493. pack32(empty, buffer);
  494. return;
  495. }
  496. pack32(job_resrcs_ptr->nhosts, buffer);
  497. pack32(job_resrcs_ptr->ncpus, buffer);
  498. pack32(job_resrcs_ptr->node_req, buffer);
  499. packstr(job_resrcs_ptr->nodes, buffer);
  500. if (job_resrcs_ptr->cpu_array_reps)
  501. pack32_array(job_resrcs_ptr->cpu_array_reps,
  502. job_resrcs_ptr->cpu_array_cnt, buffer);
  503. else
  504. pack32_array(job_resrcs_ptr->cpu_array_reps, 0, buffer);
  505. if (job_resrcs_ptr->cpu_array_value)
  506. pack16_array(job_resrcs_ptr->cpu_array_value,
  507. job_resrcs_ptr->cpu_array_cnt, buffer);
  508. else
  509. pack16_array(job_resrcs_ptr->cpu_array_value,
  510. 0, buffer);
  511. if (job_resrcs_ptr->cpus)
  512. pack16_array(job_resrcs_ptr->cpus,
  513. job_resrcs_ptr->nhosts, buffer);
  514. else
  515. pack16_array(job_resrcs_ptr->cpus, 0, buffer);
  516. if (job_resrcs_ptr->cpus_used)
  517. pack16_array(job_resrcs_ptr->cpus_used,
  518. job_resrcs_ptr->nhosts, buffer);
  519. else
  520. pack16_array(job_resrcs_ptr->cpus_used, 0, buffer);
  521. if (job_resrcs_ptr->memory_allocated)
  522. pack32_array(job_resrcs_ptr->memory_allocated,
  523. job_resrcs_ptr->nhosts, buffer);
  524. else
  525. pack32_array(job_resrcs_ptr->memory_allocated,
  526. 0, buffer);
  527. if (job_resrcs_ptr->memory_used)
  528. pack32_array(job_resrcs_ptr->memory_used,
  529. job_resrcs_ptr->nhosts, buffer);
  530. else
  531. pack32_array(job_resrcs_ptr->memory_used, 0, buffer);
  532. if (!(cluster_flags & CLUSTER_FLAG_BG)) {
  533. int i;
  534. uint32_t core_cnt = 0, sock_recs = 0;
  535. xassert(job_resrcs_ptr->cores_per_socket);
  536. xassert(job_resrcs_ptr->sock_core_rep_count);
  537. xassert(job_resrcs_ptr->sockets_per_node);
  538. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  539. core_cnt += job_resrcs_ptr->sockets_per_node[i]
  540. * job_resrcs_ptr->cores_per_socket[i] *
  541. job_resrcs_ptr->sock_core_rep_count[i];
  542. sock_recs += job_resrcs_ptr->
  543. sock_core_rep_count[i];
  544. if (sock_recs >= job_resrcs_ptr->nhosts)
  545. break;
  546. }
  547. i++;
  548. pack16_array(job_resrcs_ptr->sockets_per_node,
  549. (uint32_t) i, buffer);
  550. pack16_array(job_resrcs_ptr->cores_per_socket,
  551. (uint32_t) i, buffer);
  552. pack32_array(job_resrcs_ptr->sock_core_rep_count,
  553. (uint32_t) i, buffer);
  554. xassert(job_resrcs_ptr->core_bitmap);
  555. xassert(job_resrcs_ptr->core_bitmap_used);
  556. pack_bit_str(job_resrcs_ptr->core_bitmap, buffer);
  557. pack_bit_str(job_resrcs_ptr->core_bitmap_used, buffer);
  558. }
  559. } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  560. uint8_t tmp_8;
  561. if (job_resrcs_ptr == NULL) {
  562. uint32_t empty = NO_VAL;
  563. pack32(empty, buffer);
  564. return;
  565. }
  566. pack32(job_resrcs_ptr->nhosts, buffer);
  567. pack32(job_resrcs_ptr->ncpus, buffer);
  568. tmp_8 = job_resrcs_ptr->node_req; /* 32-bit in v2.5 */
  569. pack8(tmp_8, buffer);
  570. packstr(job_resrcs_ptr->nodes, buffer);
  571. if (job_resrcs_ptr->cpu_array_reps)
  572. pack32_array(job_resrcs_ptr->cpu_array_reps,
  573. job_resrcs_ptr->cpu_array_cnt, buffer);
  574. else
  575. pack32_array(job_resrcs_ptr->cpu_array_reps, 0, buffer);
  576. if (job_resrcs_ptr->cpu_array_value)
  577. pack16_array(job_resrcs_ptr->cpu_array_value,
  578. job_resrcs_ptr->cpu_array_cnt, buffer);
  579. else
  580. pack16_array(job_resrcs_ptr->cpu_array_value,
  581. 0, buffer);
  582. if (job_resrcs_ptr->cpus)
  583. pack16_array(job_resrcs_ptr->cpus,
  584. job_resrcs_ptr->nhosts, buffer);
  585. else
  586. pack16_array(job_resrcs_ptr->cpus, 0, buffer);
  587. if (job_resrcs_ptr->cpus_used)
  588. pack16_array(job_resrcs_ptr->cpus_used,
  589. job_resrcs_ptr->nhosts, buffer);
  590. else
  591. pack16_array(job_resrcs_ptr->cpus_used, 0, buffer);
  592. if (job_resrcs_ptr->memory_allocated)
  593. pack32_array(job_resrcs_ptr->memory_allocated,
  594. job_resrcs_ptr->nhosts, buffer);
  595. else
  596. pack32_array(job_resrcs_ptr->memory_allocated,
  597. 0, buffer);
  598. if (job_resrcs_ptr->memory_used)
  599. pack32_array(job_resrcs_ptr->memory_used,
  600. job_resrcs_ptr->nhosts, buffer);
  601. else
  602. pack32_array(job_resrcs_ptr->memory_used, 0, buffer);
  603. if (!(cluster_flags & CLUSTER_FLAG_BG)) {
  604. int i;
  605. uint32_t core_cnt = 0, sock_recs = 0;
  606. xassert(job_resrcs_ptr->cores_per_socket);
  607. xassert(job_resrcs_ptr->sock_core_rep_count);
  608. xassert(job_resrcs_ptr->sockets_per_node);
  609. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  610. core_cnt += job_resrcs_ptr->sockets_per_node[i]
  611. * job_resrcs_ptr->cores_per_socket[i] *
  612. job_resrcs_ptr->sock_core_rep_count[i];
  613. sock_recs += job_resrcs_ptr->
  614. sock_core_rep_count[i];
  615. if (sock_recs >= job_resrcs_ptr->nhosts)
  616. break;
  617. }
  618. i++;
  619. pack16_array(job_resrcs_ptr->sockets_per_node,
  620. (uint32_t) i, buffer);
  621. pack16_array(job_resrcs_ptr->cores_per_socket,
  622. (uint32_t) i, buffer);
  623. pack32_array(job_resrcs_ptr->sock_core_rep_count,
  624. (uint32_t) i, buffer);
  625. xassert(job_resrcs_ptr->core_bitmap);
  626. xassert(job_resrcs_ptr->core_bitmap_used);
  627. pack_bit_str(job_resrcs_ptr->core_bitmap, buffer);
  628. pack_bit_str(job_resrcs_ptr->core_bitmap_used, buffer);
  629. }
  630. } else {
  631. error("pack_job_resources: protocol_version %hu not supported",
  632. protocol_version);
  633. }
  634. }
  635. extern int unpack_job_resources(job_resources_t **job_resrcs_pptr,
  636. Buf buffer, uint16_t protocol_version)
  637. {
  638. char *bit_fmt = NULL;
  639. uint32_t empty, tmp32;
  640. job_resources_t *job_resrcs;
  641. uint32_t cluster_flags = slurmdb_setup_cluster_flags();
  642. xassert(job_resrcs_pptr);
  643. if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
  644. safe_unpack32(&empty, buffer);
  645. if (empty == NO_VAL) {
  646. *job_resrcs_pptr = NULL;
  647. return SLURM_SUCCESS;
  648. }
  649. job_resrcs = xmalloc(sizeof(struct job_resources));
  650. job_resrcs->nhosts = empty;
  651. safe_unpack32(&job_resrcs->ncpus, buffer);
  652. safe_unpack32(&job_resrcs->node_req, buffer);
  653. safe_unpackstr_xmalloc(&job_resrcs->nodes, &tmp32, buffer);
  654. safe_unpack32_array(&job_resrcs->cpu_array_reps,
  655. &tmp32, buffer);
  656. if (tmp32 == 0)
  657. xfree(job_resrcs->cpu_array_reps);
  658. job_resrcs->cpu_array_cnt = tmp32;
  659. safe_unpack16_array(&job_resrcs->cpu_array_value,
  660. &tmp32, buffer);
  661. if (tmp32 == 0)
  662. xfree(job_resrcs->cpu_array_value);
  663. if (tmp32 != job_resrcs->cpu_array_cnt)
  664. goto unpack_error;
  665. safe_unpack16_array(&job_resrcs->cpus, &tmp32, buffer);
  666. if (tmp32 == 0)
  667. xfree(job_resrcs->cpus);
  668. if (tmp32 != job_resrcs->nhosts)
  669. goto unpack_error;
  670. safe_unpack16_array(&job_resrcs->cpus_used, &tmp32, buffer);
  671. if (tmp32 == 0)
  672. xfree(job_resrcs->cpus_used);
  673. safe_unpack32_array(&job_resrcs->memory_allocated,
  674. &tmp32, buffer);
  675. if (tmp32 == 0)
  676. xfree(job_resrcs->memory_allocated);
  677. safe_unpack32_array(&job_resrcs->memory_used, &tmp32, buffer);
  678. if (tmp32 == 0)
  679. xfree(job_resrcs->memory_used);
  680. if (!(cluster_flags & CLUSTER_FLAG_BG)) {
  681. safe_unpack16_array(&job_resrcs->sockets_per_node,
  682. &tmp32, buffer);
  683. if (tmp32 == 0)
  684. xfree(job_resrcs->sockets_per_node);
  685. safe_unpack16_array(&job_resrcs->cores_per_socket,
  686. &tmp32, buffer);
  687. if (tmp32 == 0)
  688. xfree(job_resrcs->cores_per_socket);
  689. safe_unpack32_array(&job_resrcs->sock_core_rep_count,
  690. &tmp32, buffer);
  691. if (tmp32 == 0)
  692. xfree(job_resrcs->sock_core_rep_count);
  693. unpack_bit_str(&job_resrcs->core_bitmap, buffer);
  694. unpack_bit_str(&job_resrcs->core_bitmap_used, buffer);
  695. }
  696. } else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  697. uint8_t tmp_8;
  698. safe_unpack32(&empty, buffer);
  699. if (empty == NO_VAL) {
  700. *job_resrcs_pptr = NULL;
  701. return SLURM_SUCCESS;
  702. }
  703. job_resrcs = xmalloc(sizeof(struct job_resources));
  704. job_resrcs->nhosts = empty;
  705. safe_unpack32(&job_resrcs->ncpus, buffer);
  706. safe_unpack8(&tmp_8, buffer);
  707. if (tmp_8 < 100) /* Not NODE_CR_RESERVED */
  708. job_resrcs->node_req = tmp_8; /* 32-bit in v2.5 */
  709. else
  710. job_resrcs->node_req = NODE_CR_RESERVED;
  711. safe_unpackstr_xmalloc(&job_resrcs->nodes, &tmp32, buffer);
  712. safe_unpack32_array(&job_resrcs->cpu_array_reps,
  713. &tmp32, buffer);
  714. if (tmp32 == 0)
  715. xfree(job_resrcs->cpu_array_reps);
  716. job_resrcs->cpu_array_cnt = tmp32;
  717. safe_unpack16_array(&job_resrcs->cpu_array_value,
  718. &tmp32, buffer);
  719. if (tmp32 == 0)
  720. xfree(job_resrcs->cpu_array_value);
  721. if (tmp32 != job_resrcs->cpu_array_cnt)
  722. goto unpack_error;
  723. safe_unpack16_array(&job_resrcs->cpus, &tmp32, buffer);
  724. if (tmp32 == 0)
  725. xfree(job_resrcs->cpus);
  726. if (tmp32 != job_resrcs->nhosts)
  727. goto unpack_error;
  728. safe_unpack16_array(&job_resrcs->cpus_used, &tmp32, buffer);
  729. if (tmp32 == 0)
  730. xfree(job_resrcs->cpus_used);
  731. safe_unpack32_array(&job_resrcs->memory_allocated,
  732. &tmp32, buffer);
  733. if (tmp32 == 0)
  734. xfree(job_resrcs->memory_allocated);
  735. safe_unpack32_array(&job_resrcs->memory_used, &tmp32, buffer);
  736. if (tmp32 == 0)
  737. xfree(job_resrcs->memory_used);
  738. if (!(cluster_flags & CLUSTER_FLAG_BG)) {
  739. safe_unpack16_array(&job_resrcs->sockets_per_node,
  740. &tmp32, buffer);
  741. if (tmp32 == 0)
  742. xfree(job_resrcs->sockets_per_node);
  743. safe_unpack16_array(&job_resrcs->cores_per_socket,
  744. &tmp32, buffer);
  745. if (tmp32 == 0)
  746. xfree(job_resrcs->cores_per_socket);
  747. safe_unpack32_array(&job_resrcs->sock_core_rep_count,
  748. &tmp32, buffer);
  749. if (tmp32 == 0)
  750. xfree(job_resrcs->sock_core_rep_count);
  751. unpack_bit_str(&job_resrcs->core_bitmap, buffer);
  752. unpack_bit_str(&job_resrcs->core_bitmap_used, buffer);
  753. }
  754. } else {
  755. error("unpack_job_resources: protocol_version %hu not "
  756. "supported", protocol_version);
  757. goto unpack_error;
  758. }
  759. *job_resrcs_pptr = job_resrcs;
  760. return SLURM_SUCCESS;
  761. unpack_error:
  762. error("unpack_job_resources: unpack error");
  763. free_job_resources(&job_resrcs);
  764. xfree(bit_fmt);
  765. *job_resrcs_pptr = NULL;
  766. return SLURM_ERROR;
  767. }
  768. extern int get_job_resources_offset(job_resources_t *job_resrcs_ptr,
  769. uint32_t node_id, uint16_t socket_id,
  770. uint16_t core_id)
  771. {
  772. int i, bit_inx = 0;
  773. xassert(job_resrcs_ptr);
  774. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  775. if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
  776. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  777. job_resrcs_ptr->cores_per_socket[i] *
  778. job_resrcs_ptr->sock_core_rep_count[i];
  779. node_id -= job_resrcs_ptr->sock_core_rep_count[i];
  780. } else if (socket_id >= job_resrcs_ptr->
  781. sockets_per_node[i]) {
  782. error("get_job_resrcs_bit: socket_id >= socket_cnt "
  783. "(%u >= %u)", socket_id,
  784. job_resrcs_ptr->sockets_per_node[i]);
  785. return -1;
  786. } else if (core_id >= job_resrcs_ptr->cores_per_socket[i]) {
  787. error("get_job_resrcs_bit: core_id >= core_cnt "
  788. "(%u >= %u)", core_id,
  789. job_resrcs_ptr->cores_per_socket[i]);
  790. return -1;
  791. } else {
  792. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  793. job_resrcs_ptr->cores_per_socket[i] *
  794. node_id;
  795. bit_inx += job_resrcs_ptr->cores_per_socket[i] *
  796. socket_id;
  797. bit_inx += core_id;
  798. break;
  799. }
  800. }
  801. i = bit_size(job_resrcs_ptr->core_bitmap);
  802. if (bit_inx >= i) {
  803. error("get_job_resources_bit: offset >= bitmap size "
  804. "(%d >= %d)", bit_inx, i);
  805. return -1;
  806. }
  807. return bit_inx;
  808. }
  809. extern int get_job_resources_bit(job_resources_t *job_resrcs_ptr,
  810. uint32_t node_id, uint16_t socket_id,
  811. uint16_t core_id)
  812. {
  813. int bit_inx = get_job_resources_offset(job_resrcs_ptr, node_id,
  814. socket_id, core_id);
  815. if (bit_inx < 0)
  816. return SLURM_ERROR;
  817. return bit_test(job_resrcs_ptr->core_bitmap, bit_inx);
  818. }
  819. extern int set_job_resources_bit(job_resources_t *job_resrcs_ptr,
  820. uint32_t node_id, uint16_t socket_id,
  821. uint16_t core_id)
  822. {
  823. int bit_inx = get_job_resources_offset(job_resrcs_ptr, node_id,
  824. socket_id, core_id);
  825. if (bit_inx < 0)
  826. return SLURM_ERROR;
  827. bit_set(job_resrcs_ptr->core_bitmap, bit_inx);
  828. return SLURM_SUCCESS;
  829. }
  830. /* For every core bitmap and core_bitmap_used set in the "from" resources
  831. * structure at from_node_offset, set the corresponding bit in the "new"
  832. * resources structure at new_node_offset */
  833. extern int job_resources_bits_copy(job_resources_t *new_job_resrcs_ptr,
  834. uint16_t new_node_offset,
  835. job_resources_t *from_job_resrcs_ptr,
  836. uint16_t from_node_offset)
  837. {
  838. int i, rc = SLURM_SUCCESS;
  839. int new_bit_inx = 0, new_core_cnt = 0;
  840. int from_bit_inx = 0, from_core_cnt = 0;
  841. xassert(new_job_resrcs_ptr);
  842. xassert(from_job_resrcs_ptr);
  843. if (new_node_offset >= new_job_resrcs_ptr->nhosts) {
  844. error("job_resources_bits_move: new_node_offset invalid "
  845. "(%u is 0 or >=%u)", new_node_offset,
  846. new_job_resrcs_ptr->nhosts);
  847. return SLURM_ERROR;
  848. }
  849. for (i = 0; i < new_job_resrcs_ptr->nhosts; i++) {
  850. if (new_job_resrcs_ptr->sock_core_rep_count[i] <=
  851. new_node_offset) {
  852. new_bit_inx += new_job_resrcs_ptr->sockets_per_node[i] *
  853. new_job_resrcs_ptr->cores_per_socket[i] *
  854. new_job_resrcs_ptr->sock_core_rep_count[i];
  855. new_node_offset -= new_job_resrcs_ptr->
  856. sock_core_rep_count[i];
  857. } else {
  858. new_bit_inx += new_job_resrcs_ptr->sockets_per_node[i] *
  859. new_job_resrcs_ptr->cores_per_socket[i] *
  860. new_node_offset;
  861. new_core_cnt = new_job_resrcs_ptr->sockets_per_node[i] *
  862. new_job_resrcs_ptr->cores_per_socket[i];
  863. break;
  864. }
  865. }
  866. if (from_node_offset >= from_job_resrcs_ptr->nhosts) {
  867. error("job_resources_bits_move: from_node_offset invalid "
  868. "(%u is 0 or >=%u)", from_node_offset,
  869. from_job_resrcs_ptr->nhosts);
  870. return SLURM_ERROR;
  871. }
  872. for (i = 0; i < from_job_resrcs_ptr->nhosts; i++) {
  873. if (from_job_resrcs_ptr->sock_core_rep_count[i] <=
  874. from_node_offset) {
  875. from_bit_inx += from_job_resrcs_ptr->sockets_per_node[i] *
  876. from_job_resrcs_ptr->cores_per_socket[i] *
  877. from_job_resrcs_ptr->sock_core_rep_count[i];
  878. from_node_offset -= from_job_resrcs_ptr->
  879. sock_core_rep_count[i];
  880. } else {
  881. from_bit_inx += from_job_resrcs_ptr->sockets_per_node[i] *
  882. from_job_resrcs_ptr->cores_per_socket[i] *
  883. from_node_offset;
  884. from_core_cnt = from_job_resrcs_ptr->sockets_per_node[i] *
  885. from_job_resrcs_ptr->cores_per_socket[i];
  886. break;
  887. }
  888. }
  889. if (new_core_cnt != from_core_cnt) {
  890. error("job_resources_bits_move: core_cnt mis-match (%d != %d)",
  891. new_core_cnt, from_core_cnt);
  892. new_core_cnt = MIN(new_core_cnt, from_core_cnt);
  893. rc = SLURM_ERROR;
  894. }
  895. for (i = 0; i < new_core_cnt; i++) {
  896. if (bit_test(from_job_resrcs_ptr->core_bitmap, from_bit_inx+i))
  897. bit_set(new_job_resrcs_ptr->core_bitmap,new_bit_inx+i);
  898. if (bit_test(from_job_resrcs_ptr->core_bitmap_used,
  899. from_bit_inx+i)) {
  900. bit_set(new_job_resrcs_ptr->core_bitmap_used,
  901. new_bit_inx+i);
  902. }
  903. }
  904. return rc;
  905. }
  906. extern int get_job_resources_node(job_resources_t *job_resrcs_ptr,
  907. uint32_t node_id)
  908. {
  909. int i, bit_inx = 0, core_cnt = 0;
  910. xassert(job_resrcs_ptr);
  911. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  912. if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
  913. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  914. job_resrcs_ptr->cores_per_socket[i] *
  915. job_resrcs_ptr->sock_core_rep_count[i];
  916. node_id -= job_resrcs_ptr->sock_core_rep_count[i];
  917. } else {
  918. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  919. job_resrcs_ptr->cores_per_socket[i] *
  920. node_id;
  921. core_cnt = job_resrcs_ptr->sockets_per_node[i] *
  922. job_resrcs_ptr->cores_per_socket[i];
  923. break;
  924. }
  925. }
  926. if (core_cnt < 1) {
  927. error("get_job_resources_node: core_cnt=0");
  928. return 0;
  929. }
  930. i = bit_size(job_resrcs_ptr->core_bitmap);
  931. if ((bit_inx + core_cnt) > i) {
  932. error("get_job_resources_node: offset > bitmap size "
  933. "(%d >= %d)", (bit_inx + core_cnt), i);
  934. return 0;
  935. }
  936. for (i=0; i<core_cnt; i++) {
  937. if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
  938. return 1;
  939. }
  940. return 0;
  941. }
  942. static int _change_job_resources_node(job_resources_t *job_resrcs_ptr,
  943. uint32_t node_id, bool new_value)
  944. {
  945. int i, bit_inx = 0, core_cnt = 0;
  946. xassert(job_resrcs_ptr);
  947. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  948. if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
  949. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  950. job_resrcs_ptr->cores_per_socket[i] *
  951. job_resrcs_ptr->sock_core_rep_count[i];
  952. node_id -= job_resrcs_ptr->sock_core_rep_count[i];
  953. } else {
  954. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  955. job_resrcs_ptr->cores_per_socket[i] *
  956. node_id;
  957. core_cnt = job_resrcs_ptr->sockets_per_node[i] *
  958. job_resrcs_ptr->cores_per_socket[i];
  959. break;
  960. }
  961. }
  962. if (core_cnt < 1) {
  963. error("_change_job_resources_node: core_cnt=0");
  964. return SLURM_ERROR;
  965. }
  966. i = bit_size(job_resrcs_ptr->core_bitmap);
  967. if ((bit_inx + core_cnt) > i) {
  968. error("_change_job_resources_node: offset > bitmap size "
  969. "(%d >= %d)", (bit_inx + core_cnt), i);
  970. return SLURM_ERROR;
  971. }
  972. for (i=0; i<core_cnt; i++) {
  973. if (new_value)
  974. bit_set(job_resrcs_ptr->core_bitmap, bit_inx++);
  975. else
  976. bit_clear(job_resrcs_ptr->core_bitmap, bit_inx++);
  977. }
  978. return SLURM_SUCCESS;
  979. }
  980. extern int set_job_resources_node(job_resources_t *job_resrcs_ptr,
  981. uint32_t node_id)
  982. {
  983. return _change_job_resources_node(job_resrcs_ptr, node_id, true);
  984. }
  985. extern int clear_job_resources_node(job_resources_t *job_resrcs_ptr,
  986. uint32_t node_id)
  987. {
  988. return _change_job_resources_node(job_resrcs_ptr, node_id, false);
  989. }
  990. /* Return the count of core bitmaps set for the specific node */
  991. extern int count_job_resources_node(job_resources_t *job_resrcs_ptr,
  992. uint32_t node_id)
  993. {
  994. int i, bit_inx = 0, core_cnt = 0;
  995. int set_cnt = 0;
  996. xassert(job_resrcs_ptr);
  997. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  998. if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
  999. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  1000. job_resrcs_ptr->cores_per_socket[i] *
  1001. job_resrcs_ptr->sock_core_rep_count[i];
  1002. node_id -= job_resrcs_ptr->sock_core_rep_count[i];
  1003. } else {
  1004. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  1005. job_resrcs_ptr->cores_per_socket[i] *
  1006. node_id;
  1007. core_cnt = job_resrcs_ptr->sockets_per_node[i] *
  1008. job_resrcs_ptr->cores_per_socket[i];
  1009. break;
  1010. }
  1011. }
  1012. if (core_cnt < 1) {
  1013. error("count_job_resources_node: core_cnt=0");
  1014. return set_cnt;
  1015. }
  1016. i = bit_size(job_resrcs_ptr->core_bitmap);
  1017. if ((bit_inx + core_cnt) > i) {
  1018. error("count_job_resources_node: offset > bitmap size "
  1019. "(%d >= %d)", (bit_inx + core_cnt), i);
  1020. return set_cnt;
  1021. }
  1022. for (i=0; i<core_cnt; i++) {
  1023. if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
  1024. set_cnt++;
  1025. }
  1026. return set_cnt;
  1027. }
  1028. /* Return a copy of core_bitmap only for the specific node */
  1029. extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr,
  1030. uint32_t node_id)
  1031. {
  1032. int i, bit_inx = 0, core_cnt = 0;
  1033. bitstr_t *core_bitmap;
  1034. xassert(job_resrcs_ptr);
  1035. for (i = 0; i < job_resrcs_ptr->nhosts; i++) {
  1036. if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
  1037. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  1038. job_resrcs_ptr->cores_per_socket[i] *
  1039. job_resrcs_ptr->sock_core_rep_count[i];
  1040. node_id -= job_resrcs_ptr->sock_core_rep_count[i];
  1041. } else {
  1042. bit_inx += job_resrcs_ptr->sockets_per_node[i] *
  1043. job_resrcs_ptr->cores_per_socket[i] *
  1044. node_id;
  1045. core_cnt = job_resrcs_ptr->sockets_per_node[i] *
  1046. job_resrcs_ptr->cores_per_socket[i];
  1047. break;
  1048. }
  1049. }
  1050. if (core_cnt < 1) {
  1051. error("copy_job_resources_node: core_cnt=0");
  1052. return NULL;
  1053. }
  1054. i = bit_size(job_resrcs_ptr->core_bitmap);
  1055. if ((bit_inx + core_cnt) > i) {
  1056. error("copy_job_resources_node: offset > bitmap size "
  1057. "(%d >= %d)", (bit_inx + core_cnt), i);
  1058. return NULL;
  1059. }
  1060. core_bitmap = bit_alloc(core_cnt);
  1061. if (!core_bitmap)
  1062. fatal("copy_job_resources_node: bit_alloc(%d): %m", core_cnt);
  1063. for (i = 0; i < core_cnt; i++) {
  1064. if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
  1065. bit_set(core_bitmap, i);
  1066. }
  1067. return core_bitmap;
  1068. }
  1069. extern int get_job_resources_cnt(job_resources_t *job_resrcs_ptr,
  1070. uint32_t node_id, uint16_t *socket_cnt,
  1071. uint16_t *cores_per_socket_cnt)
  1072. {
  1073. int i, node_inx = -1;
  1074. xassert(socket_cnt);
  1075. xassert(cores_per_socket_cnt);
  1076. xassert(job_resrcs_ptr->cores_per_socket);
  1077. xassert(job_resrcs_ptr->sock_core_rep_count);
  1078. xassert(job_resrcs_ptr->sockets_per_node);
  1079. for (i=0; i<job_resrcs_ptr->nhosts; i++) {
  1080. node_inx += job_resrcs_ptr->sock_core_rep_count[i];
  1081. if (node_id <= node_inx) {
  1082. *cores_per_socket_cnt = job_resrcs_ptr->
  1083. cores_per_socket[i];
  1084. *socket_cnt = job_resrcs_ptr->sockets_per_node[i];
  1085. return SLURM_SUCCESS;
  1086. }
  1087. }
  1088. error("get_job_resources_cnt: invalid node_id: %u", node_id);
  1089. *cores_per_socket_cnt = 0;
  1090. *socket_cnt = 0;
  1091. return SLURM_ERROR;
  1092. }
  1093. /*
  1094. * Test if job can fit into the given full-length core_bitmap
  1095. * IN job_resrcs_ptr - resources allocated to a job
  1096. * IN full_bitmap - bitmap of available CPUs
  1097. * IN bits_per_node - bits per node in the full_bitmap
  1098. * RET 1 on success, 0 otherwise
  1099. */
  1100. extern int job_fits_into_cores(job_resources_t *job_resrcs_ptr,
  1101. bitstr_t *full_bitmap,
  1102. const uint16_t *bits_per_node)
  1103. {
  1104. int full_node_inx = 0, full_bit_inx = 0, job_bit_inx = 0, i;
  1105. if (!full_bitmap)
  1106. return 1;
  1107. for (full_node_inx = 0; full_node_inx < node_record_count;
  1108. full_node_inx++) {
  1109. if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) {
  1110. for (i = 0; i < bits_per_node[full_node_inx]; i++) {
  1111. if (bit_test(full_bitmap, full_bit_inx + i) &&
  1112. bit_test(job_resrcs_ptr->core_bitmap,
  1113. job_bit_inx + i)) {
  1114. return 0;
  1115. }
  1116. }
  1117. job_bit_inx += bits_per_node[full_node_inx];
  1118. }
  1119. full_bit_inx += bits_per_node[full_node_inx];
  1120. }
  1121. return 1;
  1122. }
  1123. /*
  1124. * Add job to full-length core_bitmap
  1125. * IN job_resrcs_ptr - resources allocated to a job
  1126. * IN/OUT full_bitmap - bitmap of available CPUs, allocate as needed
  1127. * IN bits_per_node - bits per node in the full_bitmap
  1128. * RET 1 on success, 0 otherwise
  1129. */
  1130. extern void add_job_to_cores(job_resources_t *job_resrcs_ptr,
  1131. bitstr_t **full_core_bitmap,
  1132. const uint16_t *bits_per_node)
  1133. {
  1134. int full_node_inx = 0;
  1135. int job_bit_inx = 0, full_bit_inx = 0, i;
  1136. if (!job_resrcs_ptr->core_bitmap)
  1137. return;
  1138. /* add the job to the row_bitmap */
  1139. if (*full_core_bitmap == NULL) {
  1140. uint32_t size = 0;
  1141. for (i = 0; i < node_record_count; i++)
  1142. size += bits_per_node[i];
  1143. *full_core_bitmap = bit_alloc(size);
  1144. if (!*full_core_bitmap)
  1145. fatal("add_job_to_cores: bitmap memory error");
  1146. }
  1147. for (full_node_inx = 0; full_node_inx < node_record_count;
  1148. full_node_inx++) {
  1149. if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) {
  1150. for (i = 0; i < bits_per_node[full_node_inx]; i++) {
  1151. if (!bit_test(job_resrcs_ptr->core_bitmap,
  1152. job_bit_inx + i))
  1153. continue;
  1154. bit_set(*full_core_bitmap, full_bit_inx + i);
  1155. }
  1156. job_bit_inx += bits_per_node[full_node_inx];
  1157. }
  1158. full_bit_inx += bits_per_node[full_node_inx];
  1159. }
  1160. }
  1161. /*
  1162. * Remove job from full-length core_bitmap
  1163. * IN job_resrcs_ptr - resources allocated to a job
  1164. * IN/OUT full_bitmap - bitmap of available CPUs, allocate as needed
  1165. * IN bits_per_node - bits per node in the full_bitmap
  1166. * RET 1 on success, 0 otherwise
  1167. */
  1168. extern void remove_job_from_cores(job_resources_t *job_resrcs_ptr,
  1169. bitstr_t **full_core_bitmap,
  1170. const uint16_t *bits_per_node)
  1171. {
  1172. int full_node_inx = 0;
  1173. int job_bit_inx = 0, full_bit_inx = 0, i;
  1174. if (!job_resrcs_ptr->core_bitmap)
  1175. return;
  1176. /* add the job to the row_bitmap */
  1177. if (*full_core_bitmap == NULL) {
  1178. uint32_t size = 0;
  1179. for (i = 0; i < node_record_count; i++)
  1180. size += bits_per_node[i];
  1181. *full_core_bitmap = bit_alloc(size);
  1182. if (!*full_core_bitmap)
  1183. fatal("add_job_to_cores: bitmap memory error");
  1184. }
  1185. for (full_node_inx = 0; full_node_inx < node_record_count;
  1186. full_node_inx++) {
  1187. if (bit_test(job_resrcs_ptr->node_bitmap, full_node_inx)) {
  1188. for (i = 0; i < bits_per_node[full_node_inx]; i++) {
  1189. if (!bit_test(job_resrcs_ptr->core_bitmap,
  1190. job_bit_inx + i))
  1191. continue;
  1192. bit_clear(*full_core_bitmap, full_bit_inx + i);
  1193. }
  1194. job_bit_inx += bits_per_node[full_node_inx];
  1195. }
  1196. full_bit_inx += bits_per_node[full_node_inx];
  1197. }
  1198. }
  1199. /* Given a job pointer and a global node index, return the index of that
  1200. * node in the job_resrcs_ptr->cpus. Return -1 if invalid */
  1201. extern int job_resources_node_inx_to_cpu_inx(job_resources_t *job_resrcs_ptr,
  1202. int node_inx)
  1203. {
  1204. int first_inx, i, node_offset;
  1205. /* Test for error cases */
  1206. if (!job_resrcs_ptr || !job_resrcs_ptr->node_bitmap) {
  1207. error("job_resources_node_inx_to_cpu_inx: "
  1208. "no job_resrcs or node_bitmap");
  1209. return -1;
  1210. }
  1211. if (!bit_test(job_resrcs_ptr->node_bitmap, node_inx)) {
  1212. error("job_resources_node_inx_to_cpu_inx: "
  1213. "Invalid node_inx");
  1214. return -1;
  1215. }
  1216. if (job_resrcs_ptr->cpu_array_cnt == 0) {
  1217. error("job_resources_node_inx_to_cpu_inx: "
  1218. "Invalid cpu_array_cnt");
  1219. return -1;
  1220. }
  1221. /* Only one record, no need to search */
  1222. if (job_resrcs_ptr->nhosts == 1)
  1223. return 0;
  1224. /* Scan bitmap, convert node_inx to node_cnt within job's allocation */
  1225. first_inx = bit_ffs(job_resrcs_ptr->node_bitmap);
  1226. for (i=first_inx, node_offset=-1; i<=node_inx; i++) {
  1227. if (bit_test(job_resrcs_ptr->node_bitmap, i))
  1228. node_offset++;
  1229. }
  1230. if (node_offset >= job_resrcs_ptr->nhosts) {
  1231. error("job_resources_node_inx_to_cpu_inx: "
  1232. "Found %d of %d nodes",
  1233. job_resrcs_ptr->nhosts, node_offset);
  1234. return -1;
  1235. }
  1236. return node_offset;
  1237. }