PageRenderTime 106ms CodeModel.GetById 1ms RepoModel.GetById 2ms app.codeStats 0ms

/src/common/slurm_cred.c

https://github.com/cfenoy/slurm
C | 2328 lines | 1776 code | 388 blank | 164 comment | 250 complexity | 3376e96ee36984bff79dd023fae7a586 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * src/common/slurm_cred.c - SLURM job and sbcast credential functions
  3. *****************************************************************************
  4. * Copyright (C) 2002-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  7. * Written by Morris Jette <jette1@llnl.gov>.
  8. * CODE-OCEC-09-009. All rights reserved.
  9. *
  10. * This file is part of SLURM, a resource management program.
  11. * For details, see <http://www.schedmd.com/slurmdocs/>.
  12. * Please also read the included file: DISCLAIMER.
  13. *
  14. * SLURM is free software; you can redistribute it and/or modify it under
  15. * the terms of the GNU General Public License as published by the Free
  16. * Software Foundation; either version 2 of the License, or (at your option)
  17. * any later version.
  18. *
  19. * In addition, as a special exception, the copyright holders give permission
  20. * to link the code of portions of this program with the OpenSSL library under
  21. * certain conditions as described in each individual source file, and
  22. * distribute linked combinations including the two. You must obey the GNU
  23. * General Public License in all respects for all of the code used other than
  24. * OpenSSL. If you modify file(s) with this exception, you may extend this
  25. * exception to your version of the file(s), but you are not obligated to do
  26. * so. If you do not wish to do so, delete this exception statement from your
  27. * version. If you delete this exception statement from all source files in
  28. * the program, then also delete it here.
  29. *
  30. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  31. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  32. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  33. * details.
  34. *
  35. * You should have received a copy of the GNU General Public License along
  36. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  37. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  38. \*****************************************************************************/
  39. #ifdef HAVE_CONFIG_H
  40. # include "config.h"
  41. #endif
  42. #include <fcntl.h>
  43. #include <stdarg.h>
  44. #include <stdlib.h>
  45. #include <sys/time.h>
  46. #if WITH_PTHREADS
  47. # include <pthread.h>
  48. #endif /* WITH_PTHREADS */
  49. #include "slurm/slurm_errno.h"
  50. #include "src/common/bitstring.h"
  51. #include "src/common/gres.h"
  52. #include "src/common/io_hdr.h"
  53. #include "src/common/job_resources.h"
  54. #include "src/common/list.h"
  55. #include "src/common/log.h"
  56. #include "src/common/macros.h"
  57. #include "src/common/plugin.h"
  58. #include "src/common/plugrack.h"
  59. #include "src/common/slurm_cred.h"
  60. #include "src/common/slurm_protocol_api.h"
  61. #include "src/common/xassert.h"
  62. #include "src/common/xmalloc.h"
  63. #include "src/common/xstring.h"
  64. #ifndef __sbcast_cred_t_defined
  65. # define __sbcast_cred_t_defined
  66. typedef struct sbcast_cred sbcast_cred_t; /* opaque data type */
  67. #endif
  68. /*
  69. * Default credential information expiration window.
  70. * Long enough for loading user environment, running prolog,
  71. * and dealing with the slurmd getting paged out of memory.
  72. */
  73. #define DEFAULT_EXPIRATION_WINDOW 1200
  74. #define EXTREME_DEBUG 0
  75. #define MAX_TIME 0x7fffffff
  76. /*
  77. * slurm job credential state
  78. *
  79. */
  80. typedef struct {
  81. time_t ctime; /* Time that the cred was created */
  82. time_t expiration; /* Time at which cred is no longer good */
  83. uint32_t jobid; /* SLURM job id for this credential */
  84. uint32_t stepid; /* SLURM step id for this credential */
  85. } cred_state_t;
  86. /*
  87. * slurm job state information
  88. * tracks jobids for which all future credentials have been revoked
  89. *
  90. */
  91. typedef struct {
  92. time_t ctime; /* Time that this entry was created */
  93. time_t expiration; /* Time at which credentials can be purged */
  94. uint32_t jobid; /* SLURM job id for this credential */
  95. time_t revoked; /* Time at which credentials were revoked */
  96. } job_state_t;
  97. /*
  98. * Completion of slurm credential context
  99. */
  100. enum ctx_type {
  101. SLURM_CRED_CREATOR,
  102. SLURM_CRED_VERIFIER
  103. };
  104. /*
  105. * slurm sbcast credential state
  106. *
  107. */
  108. struct sbcast_cred {
  109. time_t ctime; /* Time that the cred was created */
  110. time_t expiration; /* Time at which cred is no longer good*/
  111. uint32_t jobid; /* SLURM job id for this credential */
  112. char * nodes; /* nodes for which credential is valid */
  113. char *signature; /* credential signature */
  114. unsigned int siglen; /* signature length in bytes */
  115. };
  116. /*
  117. * Credential context, slurm_cred_ctx_t:
  118. */
  119. struct slurm_cred_context {
  120. #ifndef NDEBUG
  121. # define CRED_CTX_MAGIC 0x0c0c0c
  122. int magic;
  123. #endif
  124. #if WITH_PTHREADS
  125. pthread_mutex_t mutex;
  126. #endif
  127. enum ctx_type type; /* type of context (creator or verifier) */
  128. void *key; /* private or public key */
  129. List job_list; /* List of used jobids (for verifier) */
  130. List state_list; /* List of cred states (for verifier) */
  131. int expiry_window;/* expiration window for cached creds */
  132. void *exkey; /* Old public key if key is updated */
  133. time_t exkey_exp; /* Old key expiration time */
  134. };
  135. /*
  136. * Completion of slurm job credential type, slurm_cred_t:
  137. */
  138. struct slurm_job_credential {
  139. #ifndef NDEBUG
  140. # define CRED_MAGIC 0x0b0b0b
  141. int magic;
  142. #endif
  143. #ifdef WITH_PTHREADS
  144. pthread_mutex_t mutex;
  145. #endif
  146. uint32_t jobid; /* Job ID associated with this cred */
  147. uint32_t stepid; /* Job step ID for this credential */
  148. uid_t uid; /* user for which this cred is valid */
  149. uint32_t job_mem_limit;/* MB of memory reserved per node OR
  150. * real memory per CPU | MEM_PER_CPU,
  151. * default=0 (no limit) */
  152. uint32_t step_mem_limit;
  153. uint16_t core_array_size; /* core/socket array size */
  154. uint16_t *cores_per_socket;
  155. uint16_t *sockets_per_node;
  156. uint32_t *sock_core_rep_count;
  157. List job_gres_list; /* Generic resources allocated to JOB */
  158. List step_gres_list; /* Generic resources allocated to STEP */
  159. bitstr_t *job_core_bitmap;
  160. uint32_t job_nhosts; /* count of nodes allocated to JOB */
  161. char *job_hostlist; /* list of nodes allocated to JOB */
  162. bitstr_t *step_core_bitmap;
  163. time_t ctime; /* time of credential creation */
  164. char *step_hostlist;/* hostnames for which the cred is ok */
  165. char *signature; /* credential signature */
  166. unsigned int siglen; /* signature length in bytes */
  167. };
  168. /*
  169. * WARNING: Do not change the order of these fields or add additional
  170. * fields at the beginning of the structure. If you do, job accounting
  171. * plugins will stop working. If you need to add fields, add them
  172. * at the end of the structure.
  173. */
  174. typedef struct slurm_crypto_ops {
  175. void *(*crypto_read_private_key) (const char *path);
  176. void *(*crypto_read_public_key) (const char *path);
  177. void (*crypto_destroy_key) (void *key);
  178. int (*crypto_sign) (void * key, char *buffer,
  179. int buf_size, char **sig_pp,
  180. unsigned int *sig_size_p);
  181. int (*crypto_verify_sign) (void * key, char *buffer,
  182. unsigned int buf_size,
  183. char *signature,
  184. unsigned int sig_size);
  185. const char *(*crypto_str_error) (int);
  186. } slurm_crypto_ops_t;
  187. /*
  188. * These strings must be in the same order as the fields declared
  189. * for slurm_crypto_ops_t.
  190. */
  191. static const char *syms[] = {
  192. "crypto_read_private_key",
  193. "crypto_read_public_key",
  194. "crypto_destroy_key",
  195. "crypto_sign",
  196. "crypto_verify_sign",
  197. "crypto_str_error"
  198. };
  199. struct sbcast_cache {
  200. time_t expire; /* Time that the cred was created */
  201. uint32_t value; /* SLURM job id for this credential */
  202. };
  203. static slurm_crypto_ops_t ops;
  204. static plugin_context_t *g_context = NULL;
  205. static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
  206. static bool init_run = false;
  207. static time_t crypto_restart_time = (time_t) 0;
  208. static List sbcast_cache_list = NULL;
  209. /*
  210. * Static prototypes:
  211. */
  212. static slurm_cred_ctx_t _slurm_cred_ctx_alloc(void);
  213. static slurm_cred_t * _slurm_cred_alloc(void);
  214. static int _ctx_update_private_key(slurm_cred_ctx_t ctx, const char *path);
  215. static int _ctx_update_public_key(slurm_cred_ctx_t ctx, const char *path);
  216. static bool _exkey_is_valid(slurm_cred_ctx_t ctx);
  217. static cred_state_t * _cred_state_create(slurm_cred_ctx_t ctx, slurm_cred_t *c);
  218. static job_state_t * _job_state_create(uint32_t jobid);
  219. static void _cred_state_destroy(cred_state_t *cs);
  220. static void _job_state_destroy(job_state_t *js);
  221. static job_state_t * _find_job_state(slurm_cred_ctx_t ctx, uint32_t jobid);
  222. static job_state_t * _insert_job_state(slurm_cred_ctx_t ctx, uint32_t jobid);
  223. static int _find_cred_state(cred_state_t *c, slurm_cred_t *cred);
  224. static void _insert_cred_state(slurm_cred_ctx_t ctx, slurm_cred_t *cred);
  225. static void _clear_expired_job_states(slurm_cred_ctx_t ctx);
  226. static void _clear_expired_credential_states(slurm_cred_ctx_t ctx);
  227. static void _verifier_ctx_init(slurm_cred_ctx_t ctx);
  228. static bool _credential_replayed(slurm_cred_ctx_t ctx, slurm_cred_t *cred);
  229. static bool _credential_revoked(slurm_cred_ctx_t ctx, slurm_cred_t *cred);
  230. static int _slurm_cred_sign(slurm_cred_ctx_t ctx, slurm_cred_t *cred);
  231. static int _slurm_cred_verify_signature(slurm_cred_ctx_t ctx, slurm_cred_t *c);
  232. static int _slurm_crypto_init(void);
  233. static int _slurm_crypto_fini(void);
  234. static job_state_t * _job_state_unpack_one(Buf buffer);
  235. static cred_state_t * _cred_state_unpack_one(Buf buffer);
  236. static void _pack_cred(slurm_cred_t *cred, Buf buffer);
  237. static void _job_state_unpack(slurm_cred_ctx_t ctx, Buf buffer);
  238. static void _job_state_pack(slurm_cred_ctx_t ctx, Buf buffer);
  239. static void _cred_state_unpack(slurm_cred_ctx_t ctx, Buf buffer);
  240. static void _cred_state_pack(slurm_cred_ctx_t ctx, Buf buffer);
  241. static void _job_state_pack_one(job_state_t *j, Buf buffer);
  242. static void _cred_state_pack_one(cred_state_t *s, Buf buffer);
  243. static void _sbast_cache_add(sbcast_cred_t *sbcast_cred);
  244. static void _sbcast_cache_del(void *x);
  245. #ifndef DISABLE_LOCALTIME
  246. static char * timestr (const time_t *tp, char *buf, size_t n);
  247. #endif
  248. static int _slurm_crypto_init(void)
  249. {
  250. char *plugin_type = "crypto";
  251. char *type = NULL;
  252. int retval = SLURM_SUCCESS;
  253. if ( init_run && g_context ) /* mostly avoid locks for better speed */
  254. return retval;
  255. slurm_mutex_lock( &g_context_lock );
  256. if (crypto_restart_time == (time_t) 0)
  257. crypto_restart_time = time(NULL);
  258. if ( g_context )
  259. goto done;
  260. type = slurm_get_crypto_type();
  261. g_context = plugin_context_create(
  262. plugin_type, type, (void **)&ops, syms, sizeof(syms));
  263. if (!g_context) {
  264. error("cannot create %s context for %s", plugin_type, type);
  265. retval = SLURM_ERROR;
  266. goto done;
  267. }
  268. sbcast_cache_list = list_create(_sbcast_cache_del);
  269. init_run = true;
  270. done:
  271. slurm_mutex_unlock( &g_context_lock );
  272. xfree(type);
  273. return(retval);
  274. }
  275. static int _slurm_crypto_fini(void)
  276. {
  277. int rc;
  278. if (!g_context)
  279. return SLURM_SUCCESS;
  280. init_run = false;
  281. list_destroy(sbcast_cache_list);
  282. sbcast_cache_list = NULL;
  283. rc = plugin_context_destroy(g_context);
  284. g_context = NULL;
  285. return rc;
  286. }
  287. /* Terminate the plugin and release all memory. */
  288. extern int slurm_crypto_fini(void)
  289. {
  290. if (_slurm_crypto_fini() < 0)
  291. return SLURM_ERROR;
  292. return SLURM_SUCCESS;
  293. }
  294. slurm_cred_ctx_t
  295. slurm_cred_creator_ctx_create(const char *path)
  296. {
  297. slurm_cred_ctx_t ctx = NULL;
  298. if (_slurm_crypto_init() < 0)
  299. return NULL;
  300. ctx = _slurm_cred_ctx_alloc();
  301. slurm_mutex_lock(&ctx->mutex);
  302. ctx->type = SLURM_CRED_CREATOR;
  303. ctx->key = (*(ops.crypto_read_private_key))(path);
  304. if (!ctx->key)
  305. goto fail;
  306. slurm_mutex_unlock(&ctx->mutex);
  307. return ctx;
  308. fail:
  309. slurm_mutex_unlock(&ctx->mutex);
  310. slurm_cred_ctx_destroy(ctx);
  311. error("Can not open data encryption key file %s", path);
  312. return NULL;
  313. }
  314. slurm_cred_ctx_t
  315. slurm_cred_verifier_ctx_create(const char *path)
  316. {
  317. slurm_cred_ctx_t ctx = NULL;
  318. if (_slurm_crypto_init() < 0)
  319. return NULL;
  320. ctx = _slurm_cred_ctx_alloc();
  321. slurm_mutex_lock(&ctx->mutex);
  322. ctx->type = SLURM_CRED_VERIFIER;
  323. ctx->key = (*(ops.crypto_read_public_key))(path);
  324. if (!ctx->key)
  325. goto fail;
  326. _verifier_ctx_init(ctx);
  327. slurm_mutex_unlock(&ctx->mutex);
  328. return ctx;
  329. fail:
  330. slurm_mutex_unlock(&ctx->mutex);
  331. slurm_cred_ctx_destroy(ctx);
  332. error("Can not open data encryption key file %s", path);
  333. return NULL;
  334. }
  335. void
  336. slurm_cred_ctx_destroy(slurm_cred_ctx_t ctx)
  337. {
  338. if (ctx == NULL)
  339. return;
  340. if (_slurm_crypto_init() < 0)
  341. return;
  342. slurm_mutex_lock(&ctx->mutex);
  343. xassert(ctx->magic == CRED_CTX_MAGIC);
  344. if (ctx->exkey)
  345. (*(ops.crypto_destroy_key))(ctx->exkey);
  346. if (ctx->key)
  347. (*(ops.crypto_destroy_key))(ctx->key);
  348. if (ctx->job_list)
  349. list_destroy(ctx->job_list);
  350. if (ctx->state_list)
  351. list_destroy(ctx->state_list);
  352. xassert(ctx->magic = ~CRED_CTX_MAGIC);
  353. slurm_mutex_unlock(&ctx->mutex);
  354. slurm_mutex_destroy(&ctx->mutex);
  355. xfree(ctx);
  356. return;
  357. }
  358. int
  359. slurm_cred_ctx_set(slurm_cred_ctx_t ctx, slurm_cred_opt_t opt, ...)
  360. {
  361. int rc = SLURM_SUCCESS;
  362. va_list ap;
  363. xassert(ctx != NULL);
  364. va_start(ap, opt);
  365. slurm_mutex_lock(&ctx->mutex);
  366. xassert(ctx->magic == CRED_CTX_MAGIC);
  367. switch (opt) {
  368. case SLURM_CRED_OPT_EXPIRY_WINDOW:
  369. ctx->expiry_window = va_arg(ap, int);
  370. break;
  371. default:
  372. slurm_seterrno(EINVAL);
  373. rc = SLURM_ERROR;
  374. break;
  375. }
  376. slurm_mutex_unlock(&ctx->mutex);
  377. va_end(ap);
  378. return rc;
  379. }
  380. int
  381. slurm_cred_ctx_get(slurm_cred_ctx_t ctx, slurm_cred_opt_t opt, ...)
  382. {
  383. int rc = SLURM_SUCCESS;
  384. va_list ap;
  385. int *intp;
  386. xassert(ctx != NULL);
  387. va_start(ap, opt);
  388. slurm_mutex_lock(&ctx->mutex);
  389. xassert(ctx->magic == CRED_CTX_MAGIC);
  390. switch (opt) {
  391. case SLURM_CRED_OPT_EXPIRY_WINDOW:
  392. intp = va_arg(ap, int *);
  393. *intp = ctx->expiry_window;
  394. break;
  395. default:
  396. slurm_seterrno(EINVAL);
  397. rc = SLURM_ERROR;
  398. break;
  399. }
  400. slurm_mutex_unlock(&ctx->mutex);
  401. va_end(ap);
  402. return rc;
  403. }
  404. int
  405. slurm_cred_ctx_key_update(slurm_cred_ctx_t ctx, const char *path)
  406. {
  407. if (_slurm_crypto_init() < 0)
  408. return SLURM_ERROR;
  409. if (ctx->type == SLURM_CRED_CREATOR)
  410. return _ctx_update_private_key(ctx, path);
  411. else
  412. return _ctx_update_public_key(ctx, path);
  413. }
  414. slurm_cred_t *
  415. slurm_cred_create(slurm_cred_ctx_t ctx, slurm_cred_arg_t *arg)
  416. {
  417. slurm_cred_t *cred = NULL;
  418. xassert(ctx != NULL);
  419. xassert(arg != NULL);
  420. if (_slurm_crypto_init() < 0)
  421. return NULL;
  422. cred = _slurm_cred_alloc();
  423. slurm_mutex_lock(&cred->mutex);
  424. xassert(cred->magic == CRED_MAGIC);
  425. cred->jobid = arg->jobid;
  426. cred->stepid = arg->stepid;
  427. cred->uid = arg->uid;
  428. cred->job_gres_list = gres_plugin_job_state_dup(arg->job_gres_list);
  429. cred->step_gres_list = gres_plugin_step_state_dup(arg->step_gres_list);
  430. cred->job_mem_limit = arg->job_mem_limit;
  431. cred->step_mem_limit = arg->step_mem_limit;
  432. cred->step_hostlist = xstrdup(arg->step_hostlist);
  433. #ifndef HAVE_BG
  434. {
  435. int i, sock_recs = 0;
  436. #ifndef HAVE_CRAY
  437. /* Zero compute node allocations allowed on a Cray for use
  438. * of front-end nodes */
  439. xassert(arg->job_nhosts);
  440. #endif
  441. for (i = 0; i < arg->job_nhosts; i++) {
  442. sock_recs += arg->sock_core_rep_count[i];
  443. if (sock_recs >= arg->job_nhosts)
  444. break;
  445. }
  446. i++;
  447. cred->job_core_bitmap = bit_copy(arg->job_core_bitmap);
  448. cred->step_core_bitmap = bit_copy(arg->step_core_bitmap);
  449. cred->core_array_size = i;
  450. cred->cores_per_socket = xmalloc(sizeof(uint16_t) * i);
  451. memcpy(cred->cores_per_socket, arg->cores_per_socket,
  452. (sizeof(uint16_t) * i));
  453. cred->sockets_per_node = xmalloc(sizeof(uint16_t) * i);
  454. memcpy(cred->sockets_per_node, arg->sockets_per_node,
  455. (sizeof(uint16_t) * i));
  456. cred->sock_core_rep_count = xmalloc(sizeof(uint32_t) * i);
  457. memcpy(cred->sock_core_rep_count, arg->sock_core_rep_count,
  458. (sizeof(uint32_t) * i));
  459. cred->job_nhosts = arg->job_nhosts;
  460. cred->job_hostlist = xstrdup(arg->job_hostlist);
  461. }
  462. #endif
  463. cred->ctime = time(NULL);
  464. slurm_mutex_lock(&ctx->mutex);
  465. xassert(ctx->magic == CRED_CTX_MAGIC);
  466. xassert(ctx->type == SLURM_CRED_CREATOR);
  467. if (_slurm_cred_sign(ctx, cred) < 0)
  468. goto fail;
  469. slurm_mutex_unlock(&ctx->mutex);
  470. slurm_mutex_unlock(&cred->mutex);
  471. return cred;
  472. fail:
  473. slurm_mutex_unlock(&ctx->mutex);
  474. slurm_mutex_unlock(&cred->mutex);
  475. slurm_cred_destroy(cred);
  476. return NULL;
  477. }
  478. slurm_cred_t *
  479. slurm_cred_copy(slurm_cred_t *cred)
  480. {
  481. slurm_cred_t *rcred = NULL;
  482. xassert(cred != NULL);
  483. slurm_mutex_lock(&cred->mutex);
  484. rcred = _slurm_cred_alloc();
  485. slurm_mutex_lock(&rcred->mutex);
  486. xassert(rcred->magic == CRED_MAGIC);
  487. rcred->jobid = cred->jobid;
  488. rcred->stepid = cred->stepid;
  489. rcred->uid = cred->uid;
  490. rcred->job_gres_list = gres_plugin_job_state_dup(cred->job_gres_list);
  491. rcred->step_gres_list = gres_plugin_step_state_dup(cred->step_gres_list);
  492. rcred->job_mem_limit = cred->job_mem_limit;
  493. rcred->step_mem_limit = cred->step_mem_limit;
  494. rcred->step_hostlist = xstrdup(cred->step_hostlist);
  495. #ifndef HAVE_BG
  496. rcred->job_core_bitmap = bit_copy(cred->job_core_bitmap);
  497. rcred->step_core_bitmap = bit_copy(cred->step_core_bitmap);
  498. rcred->core_array_size = cred->core_array_size;
  499. rcred->cores_per_socket = xmalloc(sizeof(uint16_t) *
  500. rcred->core_array_size);
  501. memcpy(rcred->cores_per_socket, cred->cores_per_socket,
  502. (sizeof(uint16_t) * rcred->core_array_size));
  503. rcred->sockets_per_node = xmalloc(sizeof(uint16_t) *
  504. rcred->core_array_size);
  505. memcpy(rcred->sockets_per_node, cred->sockets_per_node,
  506. (sizeof(uint16_t) * rcred->core_array_size));
  507. cred->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
  508. rcred->core_array_size);
  509. memcpy(rcred->sock_core_rep_count, cred->sock_core_rep_count,
  510. (sizeof(uint32_t) * rcred->core_array_size));
  511. rcred->job_nhosts = cred->job_nhosts;
  512. rcred->job_hostlist = xstrdup(cred->job_hostlist);
  513. #endif
  514. rcred->ctime = cred->ctime;
  515. rcred->siglen = cred->siglen;
  516. /* Assumes signature is a string,
  517. * otherwise use xmalloc and strcpy here */
  518. rcred->signature = xstrdup(cred->signature);
  519. slurm_mutex_unlock(&cred->mutex);
  520. slurm_mutex_unlock(&rcred->mutex);
  521. return rcred;
  522. }
  523. slurm_cred_t *
  524. slurm_cred_faker(slurm_cred_arg_t *arg)
  525. {
  526. int fd;
  527. slurm_cred_t *cred = NULL;
  528. xassert(arg != NULL);
  529. cred = _slurm_cred_alloc();
  530. slurm_mutex_lock(&cred->mutex);
  531. cred->jobid = arg->jobid;
  532. cred->stepid = arg->stepid;
  533. cred->uid = arg->uid;
  534. cred->job_mem_limit = arg->job_mem_limit;
  535. cred->step_mem_limit = arg->step_mem_limit;
  536. cred->step_hostlist = xstrdup(arg->step_hostlist);
  537. #ifndef HAVE_BG
  538. {
  539. int i, sock_recs = 0;
  540. for (i=0; i<arg->job_nhosts; i++) {
  541. sock_recs += arg->sock_core_rep_count[i];
  542. if (sock_recs >= arg->job_nhosts)
  543. break;
  544. }
  545. i++;
  546. cred->job_core_bitmap = bit_copy(arg->job_core_bitmap);
  547. cred->step_core_bitmap = bit_copy(arg->step_core_bitmap);
  548. cred->core_array_size = i;
  549. cred->cores_per_socket = xmalloc(sizeof(uint16_t) * i);
  550. memcpy(cred->cores_per_socket, arg->cores_per_socket,
  551. (sizeof(uint16_t) * i));
  552. cred->sockets_per_node = xmalloc(sizeof(uint16_t) * i);
  553. memcpy(cred->sockets_per_node, arg->sockets_per_node,
  554. (sizeof(uint16_t) * i));
  555. cred->sock_core_rep_count = xmalloc(sizeof(uint32_t) * i);
  556. memcpy(cred->sock_core_rep_count, arg->sock_core_rep_count,
  557. (sizeof(uint32_t) * i));
  558. cred->job_nhosts = arg->job_nhosts;
  559. cred->job_hostlist = xstrdup(arg->job_hostlist);
  560. }
  561. #endif
  562. cred->ctime = time(NULL);
  563. cred->siglen = SLURM_IO_KEY_SIZE;
  564. cred->signature = xmalloc(cred->siglen * sizeof(char));
  565. if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) {
  566. if (read(fd, cred->signature, cred->siglen) == -1)
  567. error("reading fake signature from /dev/urandom: %m");
  568. if (close(fd) < 0)
  569. error("close(/dev/urandom): %m");
  570. } else { /* Note: some systems lack this file */
  571. unsigned int i;
  572. struct timeval tv;
  573. gettimeofday(&tv, NULL);
  574. i = (unsigned int) (tv.tv_sec + tv.tv_usec);
  575. srand((unsigned int) i);
  576. for (i=0; i<cred->siglen; i++)
  577. cred->signature[i] = (rand() & 0xff);
  578. }
  579. slurm_mutex_unlock(&cred->mutex);
  580. return cred;
  581. }
  582. void slurm_cred_free_args(slurm_cred_arg_t *arg)
  583. {
  584. FREE_NULL_BITMAP(arg->job_core_bitmap);
  585. FREE_NULL_BITMAP(arg->step_core_bitmap);
  586. xfree(arg->cores_per_socket);
  587. FREE_NULL_LIST(arg->job_gres_list);
  588. FREE_NULL_LIST(arg->step_gres_list);
  589. xfree(arg->step_hostlist);
  590. xfree(arg->job_hostlist);
  591. xfree(arg->sock_core_rep_count);
  592. xfree(arg->sockets_per_node);
  593. }
  594. int slurm_cred_get_args(slurm_cred_t *cred, slurm_cred_arg_t *arg)
  595. {
  596. xassert(cred != NULL);
  597. xassert(arg != NULL);
  598. /*
  599. * set arguments to cred contents
  600. */
  601. slurm_mutex_lock(&cred->mutex);
  602. arg->jobid = cred->jobid;
  603. arg->stepid = cred->stepid;
  604. arg->uid = cred->uid;
  605. arg->job_gres_list = gres_plugin_job_state_dup(cred->job_gres_list);
  606. arg->step_gres_list = gres_plugin_step_state_dup(cred->step_gres_list);
  607. arg->job_mem_limit = cred->job_mem_limit;
  608. arg->step_mem_limit = cred->step_mem_limit;
  609. arg->step_hostlist = xstrdup(cred->step_hostlist);
  610. #ifdef HAVE_BG
  611. arg->job_core_bitmap = NULL;
  612. arg->step_core_bitmap = NULL;
  613. arg->cores_per_socket = NULL;
  614. arg->sockets_per_node = NULL;
  615. arg->sock_core_rep_count = NULL;
  616. arg->job_nhosts = 0;
  617. arg->job_hostlist = NULL;
  618. #else
  619. arg->job_core_bitmap = bit_copy(cred->job_core_bitmap);
  620. arg->step_core_bitmap = bit_copy(cred->step_core_bitmap);
  621. arg->cores_per_socket = xmalloc(sizeof(uint16_t) *
  622. cred->core_array_size);
  623. memcpy(arg->cores_per_socket, cred->cores_per_socket,
  624. (sizeof(uint16_t) * cred->core_array_size));
  625. arg->sockets_per_node = xmalloc(sizeof(uint16_t) *
  626. cred->core_array_size);
  627. memcpy(arg->sockets_per_node, cred->sockets_per_node,
  628. (sizeof(uint16_t) * cred->core_array_size));
  629. arg->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
  630. cred->core_array_size);
  631. memcpy(arg->sock_core_rep_count, cred->sock_core_rep_count,
  632. (sizeof(uint32_t) * cred->core_array_size));
  633. arg->job_nhosts = cred->job_nhosts;
  634. arg->job_hostlist = xstrdup(cred->job_hostlist);
  635. #endif
  636. slurm_mutex_unlock(&cred->mutex);
  637. return SLURM_SUCCESS;
  638. }
  639. int
  640. slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t *cred,
  641. slurm_cred_arg_t *arg)
  642. {
  643. time_t now = time(NULL);
  644. int errnum;
  645. xassert(ctx != NULL);
  646. xassert(cred != NULL);
  647. xassert(arg != NULL);
  648. if (_slurm_crypto_init() < 0)
  649. return SLURM_ERROR;
  650. slurm_mutex_lock(&ctx->mutex);
  651. slurm_mutex_lock(&cred->mutex);
  652. xassert(ctx->magic == CRED_CTX_MAGIC);
  653. xassert(ctx->type == SLURM_CRED_VERIFIER);
  654. xassert(cred->magic == CRED_MAGIC);
  655. /* NOTE: the verification checks that the credential was
  656. * created by SlurmUser or root */
  657. if (_slurm_cred_verify_signature(ctx, cred) < 0) {
  658. slurm_seterrno(ESLURMD_INVALID_JOB_CREDENTIAL);
  659. goto error;
  660. }
  661. if (now > (cred->ctime + ctx->expiry_window)) {
  662. slurm_seterrno(ESLURMD_CREDENTIAL_EXPIRED);
  663. goto error;
  664. }
  665. slurm_cred_handle_reissue(ctx, cred);
  666. if (_credential_revoked(ctx, cred)) {
  667. slurm_seterrno(ESLURMD_CREDENTIAL_REVOKED);
  668. goto error;
  669. }
  670. if (_credential_replayed(ctx, cred)) {
  671. slurm_seterrno(ESLURMD_CREDENTIAL_REPLAYED);
  672. goto error;
  673. }
  674. slurm_mutex_unlock(&ctx->mutex);
  675. /*
  676. * set arguments to cred contents
  677. */
  678. arg->jobid = cred->jobid;
  679. arg->stepid = cred->stepid;
  680. arg->uid = cred->uid;
  681. arg->job_gres_list = gres_plugin_job_state_dup(cred->job_gres_list);
  682. arg->step_gres_list = gres_plugin_step_state_dup(cred->step_gres_list);
  683. arg->job_mem_limit = cred->job_mem_limit;
  684. arg->step_mem_limit = cred->step_mem_limit;
  685. arg->step_hostlist = xstrdup(cred->step_hostlist);
  686. #ifdef HAVE_BG
  687. arg->job_core_bitmap = NULL;
  688. arg->step_core_bitmap = NULL;
  689. arg->cores_per_socket = NULL;
  690. arg->sockets_per_node = NULL;
  691. arg->sock_core_rep_count = NULL;
  692. arg->job_nhosts = 0;
  693. arg->job_hostlist = NULL;
  694. #else
  695. arg->job_core_bitmap = bit_copy(cred->job_core_bitmap);
  696. arg->step_core_bitmap = bit_copy(cred->step_core_bitmap);
  697. arg->cores_per_socket = xmalloc(sizeof(uint16_t) *
  698. cred->core_array_size);
  699. memcpy(arg->cores_per_socket, cred->cores_per_socket,
  700. (sizeof(uint16_t) * cred->core_array_size));
  701. arg->sockets_per_node = xmalloc(sizeof(uint16_t) *
  702. cred->core_array_size);
  703. memcpy(arg->sockets_per_node, cred->sockets_per_node,
  704. (sizeof(uint16_t) * cred->core_array_size));
  705. arg->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
  706. cred->core_array_size);
  707. memcpy(arg->sock_core_rep_count, cred->sock_core_rep_count,
  708. (sizeof(uint32_t) * cred->core_array_size));
  709. arg->job_nhosts = cred->job_nhosts;
  710. arg->job_hostlist = xstrdup(cred->job_hostlist);
  711. #endif
  712. slurm_mutex_unlock(&cred->mutex);
  713. return SLURM_SUCCESS;
  714. error:
  715. errnum = slurm_get_errno();
  716. slurm_mutex_unlock(&ctx->mutex);
  717. slurm_mutex_unlock(&cred->mutex);
  718. slurm_seterrno(errnum);
  719. return SLURM_ERROR;
  720. }
  721. void
  722. slurm_cred_destroy(slurm_cred_t *cred)
  723. {
  724. if (cred == NULL)
  725. return;
  726. xassert(cred->magic == CRED_MAGIC);
  727. slurm_mutex_lock(&cred->mutex);
  728. #ifndef HAVE_BG
  729. FREE_NULL_BITMAP(cred->job_core_bitmap);
  730. FREE_NULL_BITMAP(cred->step_core_bitmap);
  731. xfree(cred->cores_per_socket);
  732. xfree(cred->job_hostlist);
  733. xfree(cred->sock_core_rep_count);
  734. xfree(cred->sockets_per_node);
  735. #endif
  736. FREE_NULL_LIST(cred->job_gres_list);
  737. FREE_NULL_LIST(cred->step_gres_list);
  738. xfree(cred->step_hostlist);
  739. xfree(cred->signature);
  740. xassert(cred->magic = ~CRED_MAGIC);
  741. slurm_mutex_unlock(&cred->mutex);
  742. slurm_mutex_destroy(&cred->mutex);
  743. xfree(cred);
  744. }
  745. bool
  746. slurm_cred_jobid_cached(slurm_cred_ctx_t ctx, uint32_t jobid)
  747. {
  748. bool retval = false;
  749. xassert(ctx != NULL);
  750. xassert(ctx->magic == CRED_CTX_MAGIC);
  751. xassert(ctx->type == SLURM_CRED_VERIFIER);
  752. slurm_mutex_lock(&ctx->mutex);
  753. _clear_expired_job_states(ctx);
  754. /*
  755. * Return true if we find a cached job state for job id `jobid'
  756. */
  757. retval = (_find_job_state(ctx, jobid) != NULL);
  758. slurm_mutex_unlock(&ctx->mutex);
  759. return retval;
  760. }
  761. int
  762. slurm_cred_insert_jobid(slurm_cred_ctx_t ctx, uint32_t jobid)
  763. {
  764. xassert(ctx != NULL);
  765. xassert(ctx->magic == CRED_CTX_MAGIC);
  766. xassert(ctx->type == SLURM_CRED_VERIFIER);
  767. slurm_mutex_lock(&ctx->mutex);
  768. _clear_expired_job_states(ctx);
  769. (void) _insert_job_state(ctx, jobid);
  770. slurm_mutex_unlock(&ctx->mutex);
  771. return SLURM_SUCCESS;
  772. }
  773. int
  774. slurm_cred_rewind(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  775. {
  776. int rc = 0;
  777. xassert(ctx != NULL);
  778. slurm_mutex_lock(&ctx->mutex);
  779. xassert(ctx->magic == CRED_CTX_MAGIC);
  780. xassert(ctx->type == SLURM_CRED_VERIFIER);
  781. rc = list_delete_all(ctx->state_list,
  782. (ListFindF) _find_cred_state, cred);
  783. slurm_mutex_unlock(&ctx->mutex);
  784. return (rc > 0 ? SLURM_SUCCESS : SLURM_FAILURE);
  785. }
  786. int
  787. slurm_cred_revoke(slurm_cred_ctx_t ctx, uint32_t jobid, time_t time,
  788. time_t start_time)
  789. {
  790. job_state_t *j = NULL;
  791. xassert(ctx != NULL);
  792. slurm_mutex_lock(&ctx->mutex);
  793. xassert(ctx->magic == CRED_CTX_MAGIC);
  794. xassert(ctx->type == SLURM_CRED_VERIFIER);
  795. _clear_expired_job_states(ctx);
  796. if (!(j = _find_job_state(ctx, jobid))) {
  797. /*
  798. * This node has not yet seen a job step for this
  799. * job. Insert a job state object so that we can
  800. * revoke any future credentials.
  801. */
  802. j = _insert_job_state(ctx, jobid);
  803. }
  804. if (j->revoked) {
  805. if (start_time && (j->revoked < start_time)) {
  806. debug("job %u requeued, but started no tasks", jobid);
  807. j->expiration = (time_t) MAX_TIME;
  808. } else {
  809. slurm_seterrno(EEXIST);
  810. goto error;
  811. }
  812. }
  813. j->revoked = time;
  814. slurm_mutex_unlock(&ctx->mutex);
  815. return SLURM_SUCCESS;
  816. error:
  817. slurm_mutex_unlock(&ctx->mutex);
  818. return SLURM_FAILURE;
  819. }
  820. int
  821. slurm_cred_begin_expiration(slurm_cred_ctx_t ctx, uint32_t jobid)
  822. {
  823. job_state_t *j = NULL;
  824. xassert(ctx != NULL);
  825. slurm_mutex_lock(&ctx->mutex);
  826. xassert(ctx->magic == CRED_CTX_MAGIC);
  827. xassert(ctx->type == SLURM_CRED_VERIFIER);
  828. _clear_expired_job_states(ctx);
  829. if (!(j = _find_job_state(ctx, jobid))) {
  830. slurm_seterrno(ESRCH);
  831. goto error;
  832. }
  833. if (j->expiration < (time_t) MAX_TIME) {
  834. slurm_seterrno(EEXIST);
  835. goto error;
  836. }
  837. j->expiration = time(NULL) + ctx->expiry_window;
  838. #if DEBUG_TIME
  839. {
  840. char buf[64];
  841. debug2("set revoke expiration for jobid %u to %s",
  842. j->jobid, timestr(&j->expiration, buf, 64));
  843. }
  844. #else
  845. debug2("set revoke expiration for jobid %u to %"PRIu64" UTS",
  846. j->jobid, (uint64_t) j->expiration);
  847. #endif
  848. slurm_mutex_unlock(&ctx->mutex);
  849. return SLURM_SUCCESS;
  850. error:
  851. slurm_mutex_unlock(&ctx->mutex);
  852. return SLURM_ERROR;
  853. }
  854. int
  855. slurm_cred_get_signature(slurm_cred_t *cred, char **datap, uint32_t *datalen)
  856. {
  857. xassert(cred != NULL);
  858. xassert(datap != NULL);
  859. xassert(datalen != NULL);
  860. slurm_mutex_lock(&cred->mutex);
  861. *datap = (char *) cred->signature;
  862. *datalen = cred->siglen;
  863. slurm_mutex_unlock(&cred->mutex);
  864. return SLURM_SUCCESS;
  865. }
  866. #ifndef HAVE_BG
  867. /* Convert bitmap to string representation with brackets removed */
  868. static char *_core_format(bitstr_t *core_bitmap)
  869. {
  870. char str[1024], *bracket_ptr;
  871. bit_fmt(str, sizeof(str), core_bitmap);
  872. if (str[0] != '[')
  873. return xstrdup(str);
  874. /* strip off brackets */
  875. bracket_ptr = strchr(str, ']');
  876. if (bracket_ptr)
  877. bracket_ptr[0] = '\0';
  878. return xstrdup(str+1);
  879. }
  880. #endif
  881. /*
  882. * Retrieve the set of cores that were allocated to the job and step then
  883. * format them in the List Format (e.g., "0-2,7,12-14"). Also return
  884. * job and step's memory limit.
  885. *
  886. * NOTE: caller must xfree the returned strings.
  887. */
  888. void format_core_allocs(slurm_cred_t *cred, char *node_name,
  889. char **job_alloc_cores, char **step_alloc_cores,
  890. uint32_t *job_mem_limit, uint32_t *step_mem_limit)
  891. {
  892. #ifdef HAVE_BG
  893. xassert(cred);
  894. xassert(job_alloc_cores);
  895. xassert(step_alloc_cores);
  896. *job_alloc_cores = NULL;
  897. *step_alloc_cores = NULL;
  898. *job_mem_limit = cred->job_mem_limit & (~MEM_PER_CPU);
  899. if (cred->step_mem_limit)
  900. *step_mem_limit = cred->step_mem_limit & (~MEM_PER_CPU);
  901. else
  902. *step_mem_limit = *job_mem_limit;
  903. #else
  904. bitstr_t *job_core_bitmap, *step_core_bitmap;
  905. hostset_t hset = NULL;
  906. int host_index = -1;
  907. uint32_t i, j, i_first_bit=0, i_last_bit=0;
  908. uint32_t job_core_cnt=0, step_core_cnt=0;
  909. xassert(cred);
  910. xassert(job_alloc_cores);
  911. xassert(step_alloc_cores);
  912. if (!(hset = hostset_create(cred->job_hostlist))) {
  913. error("Unable to create job hostset: `%s'",
  914. cred->job_hostlist);
  915. return;
  916. }
  917. #ifdef HAVE_FRONT_END
  918. host_index = 0;
  919. #else
  920. host_index = hostset_find(hset, node_name);
  921. #endif
  922. if ((host_index < 0) || (host_index >= cred->job_nhosts)) {
  923. error("Invalid host_index %d for job %u",
  924. host_index, cred->jobid);
  925. error("Host %s not in hostlist %s",
  926. node_name, cred->job_hostlist);
  927. hostset_destroy(hset);
  928. return;
  929. }
  930. host_index++; /* change from 0-origin to 1-origin */
  931. for (i=0; host_index; i++) {
  932. if (host_index > cred->sock_core_rep_count[i]) {
  933. i_first_bit += cred->sockets_per_node[i] *
  934. cred->cores_per_socket[i] *
  935. cred->sock_core_rep_count[i];
  936. host_index -= cred->sock_core_rep_count[i];
  937. } else {
  938. i_first_bit += cred->sockets_per_node[i] *
  939. cred->cores_per_socket[i] *
  940. (host_index - 1);
  941. i_last_bit = i_first_bit +
  942. cred->sockets_per_node[i] *
  943. cred->cores_per_socket[i];
  944. break;
  945. }
  946. }
  947. job_core_bitmap = bit_alloc(i_last_bit - i_first_bit);
  948. if (job_core_bitmap == NULL) {
  949. error("bit_alloc malloc failure");
  950. hostset_destroy(hset);
  951. return;
  952. }
  953. step_core_bitmap = bit_alloc(i_last_bit - i_first_bit);
  954. if (step_core_bitmap == NULL) {
  955. error("bit_alloc malloc failure");
  956. FREE_NULL_BITMAP(job_core_bitmap);
  957. hostset_destroy(hset);
  958. return;
  959. }
  960. for (i = i_first_bit, j = 0; i < i_last_bit; i++, j++) {
  961. if (bit_test(cred->job_core_bitmap, i)) {
  962. bit_set(job_core_bitmap, j);
  963. job_core_cnt++;
  964. }
  965. if (bit_test(cred->step_core_bitmap, i)) {
  966. bit_set(step_core_bitmap, j);
  967. step_core_cnt++;
  968. }
  969. }
  970. if (cred->job_mem_limit & MEM_PER_CPU) {
  971. *job_mem_limit = (cred->job_mem_limit & (~MEM_PER_CPU)) *
  972. job_core_cnt;
  973. } else
  974. *job_mem_limit = cred->job_mem_limit;
  975. if (cred->step_mem_limit & MEM_PER_CPU) {
  976. *step_mem_limit = (cred->step_mem_limit & (~MEM_PER_CPU)) *
  977. step_core_cnt;
  978. } else if (cred->step_mem_limit)
  979. *step_mem_limit = cred->step_mem_limit;
  980. else
  981. *step_mem_limit = *job_mem_limit;
  982. *job_alloc_cores = _core_format(job_core_bitmap);
  983. *step_alloc_cores = _core_format(step_core_bitmap);
  984. FREE_NULL_BITMAP(job_core_bitmap);
  985. FREE_NULL_BITMAP(step_core_bitmap);
  986. hostset_destroy(hset);
  987. #endif
  988. }
  989. /*
  990. * Retrieve the job and step generic resources (gres) allocate to this job
  991. * on this node.
  992. *
  993. * NOTE: Caller must destroy the returned lists
  994. */
  995. extern void get_cred_gres(slurm_cred_t *cred, char *node_name,
  996. List *job_gres_list, List *step_gres_list)
  997. {
  998. hostset_t hset = NULL;
  999. int host_index = -1;
  1000. xassert(cred);
  1001. xassert(job_gres_list);
  1002. xassert(step_gres_list);
  1003. *job_gres_list = NULL;
  1004. *step_gres_list = NULL;
  1005. if ((cred->job_gres_list == NULL) && (cred->step_gres_list == NULL))
  1006. return;
  1007. if (!(hset = hostset_create(cred->job_hostlist))) {
  1008. error("Unable to create job hostset: `%s'",
  1009. cred->job_hostlist);
  1010. return;
  1011. }
  1012. #ifdef HAVE_FRONT_END
  1013. host_index = 0;
  1014. #else
  1015. host_index = hostset_find(hset, node_name);
  1016. #endif
  1017. if ((host_index < 0) || (host_index >= cred->job_nhosts)) {
  1018. error("Invalid host_index %d for job %u",
  1019. host_index, cred->jobid);
  1020. error("Host %s not in credential hostlist %s",
  1021. node_name, cred->job_hostlist);
  1022. hostset_destroy(hset);
  1023. return;
  1024. }
  1025. *job_gres_list = gres_plugin_job_state_extract(cred->job_gres_list,
  1026. host_index);
  1027. *step_gres_list = gres_plugin_step_state_extract(cred->step_gres_list,
  1028. host_index);
  1029. return;
  1030. }
  1031. void
  1032. slurm_cred_pack(slurm_cred_t *cred, Buf buffer)
  1033. {
  1034. xassert(cred != NULL);
  1035. xassert(cred->magic == CRED_MAGIC);
  1036. slurm_mutex_lock(&cred->mutex);
  1037. _pack_cred(cred, buffer);
  1038. xassert(cred->siglen > 0);
  1039. packmem(cred->signature, cred->siglen, buffer);
  1040. slurm_mutex_unlock(&cred->mutex);
  1041. return;
  1042. }
  1043. slurm_cred_t *
  1044. slurm_cred_unpack(Buf buffer, uint16_t protocol_version)
  1045. {
  1046. uint32_t cred_uid, len;
  1047. slurm_cred_t *cred = NULL;
  1048. char *bit_fmt = NULL;
  1049. char **sigp;
  1050. uint32_t cluster_flags = slurmdb_setup_cluster_flags();
  1051. xassert(buffer != NULL);
  1052. cred = _slurm_cred_alloc();
  1053. slurm_mutex_lock(&cred->mutex);
  1054. if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) {
  1055. safe_unpack32(&cred->jobid, buffer);
  1056. safe_unpack32(&cred->stepid, buffer);
  1057. safe_unpack32(&cred_uid, buffer);
  1058. cred->uid = cred_uid;
  1059. if (gres_plugin_job_state_unpack(&cred->job_gres_list, buffer,
  1060. cred->jobid, protocol_version)
  1061. != SLURM_SUCCESS)
  1062. goto unpack_error;
  1063. if (gres_plugin_step_state_unpack(&cred->step_gres_list,
  1064. buffer, cred->jobid,
  1065. cred->stepid,
  1066. protocol_version)
  1067. != SLURM_SUCCESS) {
  1068. goto unpack_error;
  1069. }
  1070. safe_unpack32(&cred->job_mem_limit, buffer);
  1071. safe_unpack32(&cred->step_mem_limit, buffer);
  1072. safe_unpackstr_xmalloc(&cred->step_hostlist, &len, buffer);
  1073. safe_unpack_time(&cred->ctime, buffer);
  1074. if (!(cluster_flags & CLUSTER_FLAG_BG)) {
  1075. uint32_t tot_core_cnt;
  1076. safe_unpack32(&tot_core_cnt, buffer);
  1077. safe_unpackstr_xmalloc(&bit_fmt, &len, buffer);
  1078. cred->job_core_bitmap =
  1079. bit_alloc((bitoff_t) tot_core_cnt);
  1080. if (bit_unfmt(cred->job_core_bitmap, bit_fmt))
  1081. goto unpack_error;
  1082. xfree(bit_fmt);
  1083. safe_unpackstr_xmalloc(&bit_fmt, &len, buffer);
  1084. cred->step_core_bitmap =
  1085. bit_alloc((bitoff_t) tot_core_cnt);
  1086. if (bit_unfmt(cred->step_core_bitmap, bit_fmt))
  1087. goto unpack_error;
  1088. xfree(bit_fmt);
  1089. safe_unpack16(&cred->core_array_size, buffer);
  1090. if (cred->core_array_size) {
  1091. safe_unpack16_array(&cred->cores_per_socket,
  1092. &len,
  1093. buffer);
  1094. if (len != cred->core_array_size)
  1095. goto unpack_error;
  1096. safe_unpack16_array(&cred->sockets_per_node,
  1097. &len, buffer);
  1098. if (len != cred->core_array_size)
  1099. goto unpack_error;
  1100. safe_unpack32_array(&cred->sock_core_rep_count,
  1101. &len,
  1102. buffer);
  1103. if (len != cred->core_array_size)
  1104. goto unpack_error;
  1105. }
  1106. safe_unpack32(&cred->job_nhosts, buffer);
  1107. safe_unpackstr_xmalloc(&cred->job_hostlist, &len,
  1108. buffer);
  1109. }
  1110. /* "sigp" must be last */
  1111. sigp = (char **) &cred->signature;
  1112. safe_unpackmem_xmalloc(sigp, &len, buffer);
  1113. cred->siglen = len;
  1114. xassert(len > 0);
  1115. } else {
  1116. error("slurm_cred_unpack: protocol_version"
  1117. " %hu not supported", protocol_version);
  1118. goto unpack_error;
  1119. }
  1120. slurm_mutex_unlock(&cred->mutex);
  1121. return cred;
  1122. unpack_error:
  1123. xfree(bit_fmt);
  1124. slurm_mutex_unlock(&cred->mutex);
  1125. slurm_cred_destroy(cred);
  1126. return NULL;
  1127. }
  1128. int
  1129. slurm_cred_ctx_pack(slurm_cred_ctx_t ctx, Buf buffer)
  1130. {
  1131. slurm_mutex_lock(&ctx->mutex);
  1132. _job_state_pack(ctx, buffer);
  1133. _cred_state_pack(ctx, buffer);
  1134. slurm_mutex_unlock(&ctx->mutex);
  1135. return SLURM_SUCCESS;
  1136. }
  1137. int
  1138. slurm_cred_ctx_unpack(slurm_cred_ctx_t ctx, Buf buffer)
  1139. {
  1140. xassert(ctx != NULL);
  1141. xassert(ctx->magic == CRED_CTX_MAGIC);
  1142. xassert(ctx->type == SLURM_CRED_VERIFIER);
  1143. slurm_mutex_lock(&ctx->mutex);
  1144. /*
  1145. * Unpack job state list and cred state list from buffer
  1146. * appening them onto ctx->state_list and ctx->job_list.
  1147. */
  1148. _job_state_unpack(ctx, buffer);
  1149. _cred_state_unpack(ctx, buffer);
  1150. slurm_mutex_unlock(&ctx->mutex);
  1151. return SLURM_SUCCESS;
  1152. }
  1153. void
  1154. slurm_cred_print(slurm_cred_t *cred)
  1155. {
  1156. if (cred == NULL)
  1157. return;
  1158. slurm_mutex_lock(&cred->mutex);
  1159. xassert(cred->magic == CRED_MAGIC);
  1160. info("Cred: Jobid %u", cred->jobid );
  1161. info("Cred: Stepid %u", cred->stepid );
  1162. info("Cred: UID %u", (uint32_t) cred->uid);
  1163. info("Cred: Job_mem_limit %u", cred->job_mem_limit );
  1164. info("Cred: Step_mem_limit %u", cred->step_mem_limit );
  1165. info("Cred: Step hostlist %s", cred->step_hostlist );
  1166. info("Cred: ctime %s", ctime(&cred->ctime) );
  1167. info("Cred: siglen %u", cred->siglen );
  1168. #ifndef HAVE_BG
  1169. {
  1170. int i;
  1171. char str[128];
  1172. info("Cred: job_core_bitmap %s",
  1173. bit_fmt(str, sizeof(str), cred->job_core_bitmap));
  1174. info("Cred: step_core_bitmap %s",
  1175. bit_fmt(str, sizeof(str), cred->step_core_bitmap));
  1176. info("Cred: sockets_per_node, cores_per_socket, rep_count");
  1177. for (i=0; i<cred->core_array_size; i++) {
  1178. info(" socks:%u cores:%u reps:%u",
  1179. cred->sockets_per_node[i],
  1180. cred->cores_per_socket[i],
  1181. cred->sock_core_rep_count[i]);
  1182. }
  1183. info("Cred: job_nhosts %u", cred->job_nhosts );
  1184. info("Cred: job_hostlist %s", cred->job_hostlist );
  1185. }
  1186. #endif
  1187. slurm_mutex_unlock(&cred->mutex);
  1188. }
  1189. static void
  1190. _verifier_ctx_init(slurm_cred_ctx_t ctx)
  1191. {
  1192. xassert(ctx != NULL);
  1193. xassert(ctx->magic == CRED_CTX_MAGIC);
  1194. xassert(ctx->type == SLURM_CRED_VERIFIER);
  1195. ctx->job_list = list_create((ListDelF) _job_state_destroy);
  1196. ctx->state_list = list_create((ListDelF) _cred_state_destroy);
  1197. return;
  1198. }
  1199. static int
  1200. _ctx_update_private_key(slurm_cred_ctx_t ctx, const char *path)
  1201. {
  1202. void *pk = NULL;
  1203. void *tmpk = NULL;
  1204. xassert(ctx != NULL);
  1205. pk = (*(ops.crypto_read_private_key))(path);
  1206. if (!pk)
  1207. return SLURM_ERROR;
  1208. slurm_mutex_lock(&ctx->mutex);
  1209. xassert(ctx->magic == CRED_CTX_MAGIC);
  1210. xassert(ctx->type == SLURM_CRED_CREATOR);
  1211. tmpk = ctx->key;
  1212. ctx->key = pk;
  1213. slurm_mutex_unlock(&ctx->mutex);
  1214. (*(ops.crypto_destroy_key))(tmpk);
  1215. return SLURM_SUCCESS;
  1216. }
  1217. static int
  1218. _ctx_update_public_key(slurm_cred_ctx_t ctx, const char *path)
  1219. {
  1220. void *pk = NULL;
  1221. xassert(ctx != NULL);
  1222. pk = (*(ops.crypto_read_public_key))(path);
  1223. if (!pk)
  1224. return SLURM_ERROR;
  1225. slurm_mutex_lock(&ctx->mutex);
  1226. xassert(ctx->magic == CRED_CTX_MAGIC);
  1227. xassert(ctx->type == SLURM_CRED_VERIFIER);
  1228. if (ctx->exkey)
  1229. (*(ops.crypto_destroy_key))(ctx->exkey);
  1230. ctx->exkey = ctx->key;
  1231. ctx->key = pk;
  1232. /*
  1233. * exkey expires in expiry_window seconds plus one minute.
  1234. * This should be long enough to capture any keys in-flight.
  1235. */
  1236. ctx->exkey_exp = time(NULL) + ctx->expiry_window + 60;
  1237. slurm_mutex_unlock(&ctx->mutex);
  1238. return SLURM_SUCCESS;
  1239. }
  1240. static bool
  1241. _exkey_is_valid(slurm_cred_ctx_t ctx)
  1242. {
  1243. if (!ctx->exkey)
  1244. return false;
  1245. if (time(NULL) > ctx->exkey_exp) {
  1246. debug2("old job credential key slurmd expired");
  1247. (*(ops.crypto_destroy_key))(ctx->exkey);
  1248. ctx->exkey = NULL;
  1249. return false;
  1250. }
  1251. return true;
  1252. }
  1253. static slurm_cred_ctx_t
  1254. _slurm_cred_ctx_alloc(void)
  1255. {
  1256. slurm_cred_ctx_t ctx = xmalloc(sizeof(*ctx));
  1257. /* Contents initialized to zero */
  1258. slurm_mutex_init(&ctx->mutex);
  1259. slurm_mutex_lock(&ctx->mutex);
  1260. ctx->expiry_window = DEFAULT_EXPIRATION_WINDOW;
  1261. ctx->exkey_exp = (time_t) -1;
  1262. xassert(ctx->magic = CRED_CTX_MAGIC);
  1263. slurm_mutex_unlock(&ctx->mutex);
  1264. return ctx;
  1265. }
  1266. static slurm_cred_t *
  1267. _slurm_cred_alloc(void)
  1268. {
  1269. slurm_cred_t *cred = xmalloc(sizeof(*cred));
  1270. /* Contents initialized to zero */
  1271. slurm_mutex_init(&cred->mutex);
  1272. cred->uid = (uid_t) -1;
  1273. xassert(cred->magic = CRED_MAGIC);
  1274. return cred;
  1275. }
  1276. #if EXTREME_DEBUG
  1277. static void
  1278. _print_data(char *data, int datalen)
  1279. {
  1280. char buf[1024];
  1281. size_t len = 0;
  1282. int i;
  1283. for (i = 0; i < datalen; i += sizeof(char))
  1284. len += sprintf(buf+len, "%02x", data[i]);
  1285. }
  1286. #endif
  1287. static int
  1288. _slurm_cred_sign(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1289. {
  1290. Buf buffer;
  1291. int rc;
  1292. buffer = init_buf(4096);
  1293. _pack_cred(cred, buffer);
  1294. rc = (*(ops.crypto_sign))(ctx->key,
  1295. get_buf_data(buffer),
  1296. get_buf_offset(buffer),
  1297. &cred->signature,
  1298. &cred->siglen);
  1299. free_buf(buffer);
  1300. if (rc) {
  1301. error("Credential sign: %s",
  1302. (*(ops.crypto_str_error))(rc));
  1303. return SLURM_ERROR;
  1304. }
  1305. return SLURM_SUCCESS;
  1306. }
  1307. static int
  1308. _slurm_cred_verify_signature(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1309. {
  1310. Buf buffer;
  1311. int rc;
  1312. debug("Checking credential with %u bytes of sig data", cred->siglen);
  1313. buffer = init_buf(4096);
  1314. _pack_cred(cred, buffer);
  1315. rc = (*(ops.crypto_verify_sign))(ctx->key,
  1316. get_buf_data(buffer),
  1317. get_buf_offset(buffer),
  1318. cred->signature,
  1319. cred->siglen);
  1320. if (rc && _exkey_is_valid(ctx)) {
  1321. rc = (*(ops.crypto_verify_sign))(ctx->exkey,
  1322. get_buf_data(buffer),
  1323. get_buf_offset(buffer),
  1324. cred->signature,
  1325. cred->siglen);
  1326. }
  1327. free_buf(buffer);
  1328. if (rc) {
  1329. error("Credential signature check: %s",
  1330. (*(ops.crypto_str_error))(rc));
  1331. return SLURM_ERROR;
  1332. }
  1333. return SLURM_SUCCESS;
  1334. }
  1335. static void
  1336. _pack_cred(slurm_cred_t *cred, Buf buffer)
  1337. {
  1338. uint32_t cred_uid = (uint32_t) cred->uid;
  1339. pack32(cred->jobid, buffer);
  1340. pack32(cred->stepid, buffer);
  1341. pack32(cred_uid, buffer);
  1342. (void) gres_plugin_job_state_pack(cred->job_gres_list, buffer,
  1343. cred->jobid, false,
  1344. SLURM_PROTOCOL_VERSION);
  1345. gres_plugin_step_state_pack(cred->step_gres_list, buffer,
  1346. cred->jobid, cred->stepid,
  1347. SLURM_PROTOCOL_VERSION);
  1348. pack32(cred->job_mem_limit, buffer);
  1349. pack32(cred->step_mem_limit, buffer);
  1350. packstr(cred->step_hostlist, buffer);
  1351. pack_time(cred->ctime, buffer);
  1352. #ifndef HAVE_BG
  1353. {
  1354. uint32_t tot_core_cnt;
  1355. tot_core_cnt = bit_size(cred->job_core_bitmap);
  1356. pack32(tot_core_cnt, buffer);
  1357. pack_bit_fmt(cred->job_core_bitmap, buffer);
  1358. pack_bit_fmt(cred->step_core_bitmap, buffer);
  1359. pack16(cred->core_array_size, buffer);
  1360. if (cred->core_array_size) {
  1361. pack16_array(cred->cores_per_socket,
  1362. cred->core_array_size,
  1363. buffer);
  1364. pack16_array(cred->sockets_per_node,
  1365. cred->core_array_size,
  1366. buffer);
  1367. pack32_array(cred->sock_core_rep_count,
  1368. cred->core_array_size,
  1369. buffer);
  1370. }
  1371. pack32(cred->job_nhosts, buffer);
  1372. packstr(cred->job_hostlist, buffer);
  1373. }
  1374. #endif
  1375. }
  1376. static bool
  1377. _credential_replayed(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1378. {
  1379. ListIterator i = NULL;
  1380. cred_state_t *s = NULL;
  1381. _clear_expired_credential_states(ctx);
  1382. i = list_iterator_create(ctx->state_list);
  1383. while ((s = list_next(i))) {
  1384. if ((s->jobid == cred->jobid) &&
  1385. (s->stepid == cred->stepid) &&
  1386. (s->ctime == cred->ctime))
  1387. break;
  1388. }
  1389. list_iterator_destroy(i);
  1390. /*
  1391. * If we found a match, this credential is being replayed.
  1392. */
  1393. if (s)
  1394. return true;
  1395. /*
  1396. * Otherwise, save the credential state
  1397. */
  1398. _insert_cred_state(ctx, cred);
  1399. return false;
  1400. }
  1401. #ifdef DISABLE_LOCALTIME
  1402. extern char * timestr (const time_t *tp, char *buf, size_t n)
  1403. #else
  1404. static char * timestr (const time_t *tp, char *buf, size_t n)
  1405. #endif
  1406. {
  1407. char fmt[] = "%y%m%d%H%M%S";
  1408. struct tm tmval;
  1409. #ifdef DISABLE_LOCALTIME
  1410. static int disabled = 0;
  1411. if (buf == NULL)
  1412. disabled = 1;
  1413. if (disabled)
  1414. return NULL;
  1415. #endif
  1416. if (!localtime_r (tp, &tmval))
  1417. error ("localtime_r: %m");
  1418. strftime (buf, n, fmt, &tmval);
  1419. return (buf);
  1420. }
  1421. extern void
  1422. slurm_cred_handle_reissue(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1423. {
  1424. job_state_t *j = _find_job_state(ctx, cred->jobid);
  1425. if (j != NULL && j->revoked && (cred->ctime > j->revoked)) {
  1426. /* The credential has been reissued. Purge the
  1427. * old record so that "cred" will look like a new
  1428. * credential to any ensuing commands. */
  1429. info("reissued job credential for job %u", j->jobid);
  1430. /* Setting j->expiration to zero will make
  1431. * _clear_expired_job_states() remove this
  1432. * job credential from the cred context. */
  1433. j->expiration = 0;
  1434. _clear_expired_job_states(ctx);
  1435. }
  1436. }
  1437. extern bool
  1438. slurm_cred_revoked(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1439. {
  1440. job_state_t *j = _find_job_state(ctx, cred->jobid);
  1441. if ((j == NULL) || (j->revoked == (time_t)0))
  1442. return false;
  1443. if (cred->ctime <= j->revoked)
  1444. return true;
  1445. return false;
  1446. }
  1447. static bool
  1448. _credential_revoked(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1449. {
  1450. job_state_t *j = NULL;
  1451. _clear_expired_job_states(ctx);
  1452. if (!(j = _find_job_state(ctx, cred->jobid))) {
  1453. (void) _insert_job_state(ctx, cred->jobid);
  1454. return false;
  1455. }
  1456. if (cred->ctime <= j->revoked) {
  1457. #if DEBUG_TIME
  1458. char buf[64];
  1459. debug3("cred for %u revoked. expires at %s",
  1460. j->jobid, timestr(&j->expiration, buf, 64));
  1461. #else
  1462. debug3("cred for %u revoked. expires at %"PRIu64" UTS",
  1463. j->jobid, (uint64_t) j->expiration);
  1464. #endif
  1465. return true;
  1466. }
  1467. return false;
  1468. }
  1469. static job_state_t *
  1470. _find_job_state(slurm_cred_ctx_t ctx, uint32_t jobid)
  1471. {
  1472. ListIterator i = NULL;
  1473. job_state_t *j = NULL;
  1474. i = list_iterator_create(ctx->job_list);
  1475. while ((j = list_next(i))) {
  1476. if (j->jobid == jobid)
  1477. break;
  1478. }
  1479. list_iterator_destroy(i);
  1480. return j;
  1481. }
  1482. static int
  1483. _find_cred_state(cred_state_t *c, slurm_cred_t *cred)
  1484. {
  1485. return ((c->jobid == cred->jobid) && (c->stepid == cred->stepid) &&
  1486. (c->ctime == cred->ctime));
  1487. }
  1488. static job_state_t *
  1489. _insert_job_state(slurm_cred_ctx_t ctx, uint32_t jobid)
  1490. {
  1491. job_state_t *j = _job_state_create(jobid);
  1492. list_append(ctx->job_list, j);
  1493. return j;
  1494. }
  1495. static job_state_t *
  1496. _job_state_create(uint32_t jobid)
  1497. {
  1498. job_state_t *j = xmalloc(sizeof(*j));
  1499. j->jobid = jobid;
  1500. j->revoked = (time_t) 0;
  1501. j->ctime = time(NULL);
  1502. j->expiration = (time_t) MAX_TIME;
  1503. return j;
  1504. }
  1505. static void
  1506. _job_state_destroy(job_state_t *j)
  1507. {
  1508. debug3 ("destroying job %u state", j->jobid);
  1509. xfree(j);
  1510. }
  1511. static void
  1512. _clear_expired_job_states(slurm_cred_ctx_t ctx)
  1513. {
  1514. static time_t last_scan = 0;
  1515. time_t now = time(NULL);
  1516. ListIterator i = NULL;
  1517. job_state_t *j = NULL;
  1518. if ((now - last_scan) < 2) /* Reduces slurmd overhead */
  1519. return;
  1520. last_scan = now;
  1521. i = list_iterator_create(ctx->job_list);
  1522. if (!i)
  1523. fatal("list_iterator_create: malloc failure");
  1524. while ((j = list_next(i))) {
  1525. #if DEBUG_TIME
  1526. char t1[64], t2[64], t3[64];
  1527. if (j->revoked) {
  1528. strcpy(t2, " revoked:");
  1529. timestr(&j->revoked, (t2+9), (64-9));
  1530. } else {
  1531. t2[0] = '\0';
  1532. }
  1533. if (j->expiration) {
  1534. strcpy(t3, " expires:");
  1535. timestr(&j->revoked, (t3+9), (64-9));
  1536. } else {
  1537. t3[0] = '\0';
  1538. }
  1539. debug3("state for jobid %u: ctime:%s%s%s",
  1540. j->jobid, timestr(&j->ctime, t1, 64), t2, t3);
  1541. #else
  1542. debug3("state for jobid %u: ctime:%"PRIu64" revoked:%"PRIu64" "
  1543. "expires:%"PRIu64"",
  1544. j->jobid, (uint64_t)j->ctime, (uint64_t)j->revoked,
  1545. (uint64_t)j->revoked);
  1546. #endif
  1547. if (j->revoked && (now > j->expiration)) {
  1548. list_delete_item(i);
  1549. }
  1550. }
  1551. list_iterator_destroy(i);
  1552. }
  1553. static void
  1554. _clear_expired_credential_states(slurm_cred_ctx_t ctx)
  1555. {
  1556. static time_t last_scan = 0;
  1557. time_t now = time(NULL);
  1558. ListIterator i = NULL;
  1559. cred_state_t *s = NULL;
  1560. if ((now - last_scan) < 2) /* Reduces slurmd overhead */
  1561. return;
  1562. last_scan = now;
  1563. i = list_iterator_create(ctx->state_list);
  1564. while ((s = list_next(i))) {
  1565. if (now > s->expiration)
  1566. list_delete_item(i);
  1567. }
  1568. list_iterator_destroy(i);
  1569. }
  1570. static void
  1571. _insert_cred_state(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1572. {
  1573. cred_state_t *s = _cred_state_create(ctx, cred);
  1574. list_append(ctx->state_list, s);
  1575. }
  1576. static cred_state_t *
  1577. _cred_state_create(slurm_cred_ctx_t ctx, slurm_cred_t *cred)
  1578. {
  1579. cred_state_t *s = xmalloc(sizeof(*s));
  1580. s->jobid = cred->jobid;
  1581. s->stepid = cred->stepid;
  1582. s->ctime = cred->ctime;
  1583. s->expiration = cred->ctime + ctx->expiry_window;
  1584. return s;
  1585. }
  1586. static void
  1587. _cred_state_destroy(cred_state_t *s)
  1588. {
  1589. xfree(s);
  1590. }
  1591. static void
  1592. _cred_state_pack_one(cred_state_t *s, Buf buffer)
  1593. {
  1594. pack32(s->jobid, buffer);
  1595. pack32(s->stepid, buffer);
  1596. pack_time(s->ctime, buffer);
  1597. pack_time(s->expiration, buffer);
  1598. }
  1599. static cred_state_t *
  1600. _cred_state_unpack_one(Buf buffer)
  1601. {
  1602. cred_state_t *s = xmalloc(sizeof(*s));
  1603. safe_unpack32(&s->jobid, buffer);
  1604. safe_unpack32(&s->stepid, buffer);
  1605. safe_unpack_time(&s->ctime, buffer);
  1606. safe_unpack_time(&s->expiration, buffer);
  1607. return s;
  1608. unpack_error:
  1609. _cred_state_destroy(s);
  1610. return NULL;
  1611. }
  1612. static void
  1613. _job_state_pack_one(job_state_t *j, Buf buffer)
  1614. {
  1615. pack32(j->jobid, buffer);
  1616. pack_time(j->revoked, buffer);
  1617. pack_time(j->ctime, buffer);
  1618. pack_time(j->expiration, buffer);
  1619. }
  1620. static job_state_t *
  1621. _job_state_unpack_one(Buf buffer)
  1622. {
  1623. char t1[64], t2[64], t3[64];
  1624. job_state_t *j = xmalloc(sizeof(*j));
  1625. safe_unpack32( &j->jobid, buffer);
  1626. safe_unpack_time( &j->revoked, buffer);
  1627. safe_unpack_time( &j->ctime, buffer);
  1628. safe_unpack_time( &j->expiration, buffer);
  1629. if (j->revoked) {
  1630. strcpy(t2, " revoked:");
  1631. timestr(&j->revoked, (t2+9), (64-9));
  1632. } else {
  1633. t2[0] = '\0';
  1634. }
  1635. if (j->expiration) {
  1636. strcpy(t3, " expires:");
  1637. timestr(&j->revoked, (t3+9), (64-9));
  1638. } else {
  1639. t3[0] = '\0';
  1640. }
  1641. debug3("cred_unpack: job %u ctime:%s%s%s",
  1642. j->jobid, timestr (&j->ctime, t1, 64), t2, t3);
  1643. if ((j->revoked) && (j->expiration == (time_t) MAX_TIME)) {
  1644. info ("Warning: revoke on job %u has no expiration",
  1645. j->jobid);
  1646. j->expiration = j->revoked + 600;
  1647. }
  1648. return j;
  1649. unpack_error:
  1650. _job_state_destroy(j);
  1651. return NULL;
  1652. }
  1653. static void
  1654. _cred_state_pack(slurm_cred_ctx_t ctx, Buf buffer)
  1655. {
  1656. ListIterator i = NULL;
  1657. cred_state_t *s = NULL;
  1658. pack32(list_count(ctx->state_list), buffer);
  1659. i = list_iterator_create(ctx->state_list);
  1660. while ((s = list_next(i)))
  1661. _cred_state_pack_one(s, buffer);
  1662. list_iterator_destroy(i);
  1663. }
  1664. static void
  1665. _cred_state_unpack(slurm_cred_ctx_t ctx, Buf buffer)
  1666. {
  1667. time_t now = time(NULL);
  1668. uint32_t n;
  1669. int i = 0;
  1670. cred_state_t *s = NULL;
  1671. safe_unpack32(&n, buffer);
  1672. for (i = 0; i < n; i++) {
  1673. if (!(s = _cred_state_unpack_one(buffer)))
  1674. goto unpack_error;
  1675. if (now < s->expiration)
  1676. list_append(ctx->state_list, s);
  1677. }
  1678. return;
  1679. unpack_error:
  1680. error("Unable to unpack job credential state information");
  1681. return;
  1682. }
  1683. static void
  1684. _job_state_pack(slurm_cred_ctx_t ctx, Buf buffer)
  1685. {
  1686. ListIterator i = NULL;
  1687. job_state_t *j = NULL;
  1688. pack32((uint32_t) list_count(ctx->job_list), buffer);
  1689. i = list_iterator_create(ctx->job_list);
  1690. while ((j = list_next(i)))
  1691. _job_state_pack_one(j, buffer);
  1692. list_iterator_destroy(i);
  1693. }
  1694. static void
  1695. _job_state_unpack(slurm_cred_ctx_t ctx, Buf buffer)
  1696. {
  1697. time_t now = time(NULL);
  1698. uint32_t n = 0;
  1699. int i = 0;
  1700. job_state_t *j = NULL;
  1701. safe_unpack32(&n, buffer);
  1702. for (i = 0; i < n; i++) {
  1703. if (!(j = _job_state_unpack_one(buffer)))
  1704. goto unpack_error;
  1705. if (!j->revoked || (j->revoked && (now < j->expiration)))
  1706. list_append(ctx->job_list, j);
  1707. else {
  1708. debug3 ("not appending expired job %u state",
  1709. j->jobid);
  1710. }
  1711. }
  1712. return;
  1713. unpack_error:
  1714. error("Unable to unpack job state information");
  1715. return;
  1716. }
  1717. /*****************************************************************************\
  1718. ***************** SBCAST CREDENTIAL FUNCTIONS ******************
  1719. \*****************************************************************************/
  1720. /* Pack sbcast credential without the digital signature */
  1721. static void _pack_sbcast_cred(sbcast_cred_t *sbcast_cred, Buf buffer)
  1722. {
  1723. pack_time(sbcast_cred->ctime, buffer);
  1724. pack_time(sbcast_cred->expiration, buffer);
  1725. pack32(sbcast_cred->jobid, buffer);
  1726. packstr(sbcast_cred->nodes, buffer);
  1727. }
  1728. /* Create an sbcast credential for the specified job and nodes
  1729. * including digital signature.
  1730. * RET the sbcast credential or NULL on error */
  1731. sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx,
  1732. uint32_t job_id, char *nodes,
  1733. time_t expiration)
  1734. {
  1735. Buf buffer;
  1736. int rc;
  1737. sbcast_cred_t *sbcast_cred;
  1738. time_t now = time(NULL);
  1739. xassert(ctx);
  1740. if (_slurm_crypto_init() < 0)
  1741. return NULL;
  1742. sbcast_cred = xmalloc(sizeof(struct sbcast_cred));
  1743. sbcast_cred->ctime = now;
  1744. sbcast_cred->expiration = expiration;
  1745. sbcast_cred->jobid = job_id;
  1746. sbcast_cred->nodes = xstrdup(nodes);
  1747. buffer = init_buf(4096);
  1748. _pack_sbcast_cred(sbcast_cred, buffer);
  1749. rc = (*(ops.crypto_sign))(
  1750. ctx->key, get_buf_data(buffer), get_buf_offset(buffer),
  1751. &sbcast_cred->signature, &sbcast_cred->siglen);
  1752. free_buf(buffer);
  1753. if (rc) {
  1754. error("sbcast_cred sign: %s",
  1755. (*(ops.crypto_str_error))(rc));
  1756. delete_sbcast_cred(sbcast_cred);
  1757. return NULL;
  1758. }
  1759. return sbcast_cred;
  1760. }
  1761. /* Copy an sbcast credential created using create_sbcast_cred() or
  1762. * unpack_sbcast_cred() */
  1763. sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred)
  1764. {
  1765. sbcast_cred_t *rcred = NULL;
  1766. xassert(sbcast_cred);
  1767. rcred->ctime = sbcast_cred->ctime;
  1768. rcred->expiration = sbcast_cred->expiration;
  1769. rcred->jobid = sbcast_cred->jobid;
  1770. rcred->nodes = xstrdup(sbcast_cred->nodes);
  1771. rcred->siglen = sbcast_cred->siglen;
  1772. rcred->signature = xstrdup(sbcast_cred->signature);
  1773. return rcred;
  1774. }
  1775. /* Delete an sbcast credential created using create_sbcast_cred() or
  1776. * unpack_sbcast_cred() */
  1777. void delete_sbcast_cred(sbcast_cred_t *sbcast_cred)
  1778. {
  1779. if (sbcast_cred) {
  1780. xfree(sbcast_cred->nodes);
  1781. xfree(sbcast_cred->signature);
  1782. xfree(sbcast_cred);
  1783. }
  1784. }
  1785. static void _sbast_cache_add(sbcast_cred_t *sbcast_cred)
  1786. {
  1787. int i;
  1788. uint32_t sig_num = 0;
  1789. struct sbcast_cache *new_cache_rec;
  1790. /* Using two bytes at a time gives us a larger number
  1791. * and reduces the possibility of a duplicate value */
  1792. for (i = 0; i < sbcast_cred->siglen; i += 2) {
  1793. sig_num += (sbcast_cred->signature[i] << 8) +
  1794. sbcast_cred->signature[i+1];
  1795. }
  1796. new_cache_rec = xmalloc(sizeof(struct sbcast_cache));
  1797. new_cache_rec->expire = sbcast_cred->expiration;
  1798. new_cache_rec->value = sig_num;
  1799. list_append(sbcast_cache_list, new_cache_rec);
  1800. }
  1801. static void _sbcast_cache_del(void *x)
  1802. {
  1803. xfree(x);
  1804. }
  1805. /* Extract contents of an sbcast credential verifying the digital signature.
  1806. * NOTE: We can only perform the full credential validation once with
  1807. * Munge without generating a credential replay error, so we only
  1808. * verify the credential for block one. All others must have a
  1809. * recent signature on file (in our cache) or the slurmd must have
  1810. * recently been restarted.
  1811. * RET 0 on success, -1 on error */
  1812. int extract_sbcast_cred(slurm_cred_ctx_t ctx,
  1813. sbcast_cred_t *sbcast_cred, uint16_t block_no,
  1814. uint32_t *job_id, char **nodes)
  1815. {
  1816. struct sbcast_cache *next_cache_rec;
  1817. uint32_t sig_num = 0;
  1818. int i, rc;
  1819. time_t now = time(NULL);
  1820. Buf buffer;
  1821. *job_id = 0xffffffff;
  1822. *nodes = NULL;
  1823. xassert(ctx);
  1824. if (_slurm_crypto_init() < 0)
  1825. return -1;
  1826. if (now > sbcast_cred->expiration)
  1827. return -1;
  1828. if (block_no == 1) {
  1829. buffer = init_buf(4096);
  1830. _pack_sbcast_cred(sbcast_cred, buffer);
  1831. /* NOTE: the verification checks that the credential was
  1832. * created by SlurmUser or root */
  1833. rc = (*(ops.crypto_verify_sign)) (
  1834. ctx->key, get_buf_data(buffer), get_buf_offset(buffer),
  1835. sbcast_cred->signature, sbcast_cred->siglen);
  1836. free_buf(buffer);
  1837. if (rc) {
  1838. error("sbcast_cred verify: %s",
  1839. (*(ops.crypto_str_error))(rc));
  1840. return -1;
  1841. }
  1842. _sbast_cache_add(sbcast_cred);
  1843. } else {
  1844. char *err_str = NULL;
  1845. bool cache_match_found = false;
  1846. ListIterator sbcast_iter;
  1847. for (i = 0; i < sbcast_cred->siglen; i += 2) {
  1848. sig_num += (sbcast_cred->signature[i] << 8) +
  1849. sbcast_cred->signature[i+1];
  1850. }
  1851. sbcast_iter = list_iterator_create(sbcast_cache_list);
  1852. if (!sbcast_iter)
  1853. fatal("list_iterator_create: malloc failure");
  1854. while ((next_cache_rec =
  1855. (struct sbcast_cache *) list_next(sbcast_iter))) {
  1856. if ((next_cache_rec->expire == sbcast_cred->expiration) &&
  1857. (next_cache_rec->value == sig_num)) {
  1858. cache_match_found = true;
  1859. break;
  1860. }
  1861. if (next_cache_rec->expire <= now)
  1862. list_delete_item(sbcast_iter);
  1863. }
  1864. list_iterator_destroy(sbcast_iter);
  1865. if (!cache_match_found) {
  1866. error("sbcast_cred verify: signature not in cache");
  1867. if (SLURM_DIFFTIME(now, crypto_restart_time) > 60)
  1868. return -1; /* restarted >60 secs ago */
  1869. buffer = init_buf(4096);
  1870. _pack_sbcast_cred(sbcast_cred, buffer);
  1871. rc = (*(ops.crypto_verify_sign)) (
  1872. ctx->key, get_buf_data(buffer),
  1873. get_buf_offset(buffer),
  1874. sbcast_cred->signature, sbcast_cred->siglen);
  1875. free_buf(buffer);
  1876. if (rc)
  1877. err_str = (char *)(*(ops.crypto_str_error))(rc);
  1878. if (err_str && strcmp(err_str, "Credential replayed")) {
  1879. error("sbcast_cred verify: %s", err_str);
  1880. return -1;
  1881. }
  1882. info("sbcast_cred verify: signature revalidated");
  1883. _sbast_cache_add(sbcast_cred);
  1884. }
  1885. }
  1886. *job_id = sbcast_cred->jobid;
  1887. *nodes = xstrdup(sbcast_cred->nodes);
  1888. return 0;
  1889. }
  1890. /* Pack an sbcast credential into a buffer including the digital signature */
  1891. void pack_sbcast_cred(sbcast_cred_t *sbcast_cred, Buf buffer)
  1892. {
  1893. static int bad_cred_test = -1;
  1894. xassert(sbcast_cred);
  1895. xassert(sbcast_cred->siglen > 0);
  1896. _pack_sbcast_cred(sbcast_cred, buffer);
  1897. if (bad_cred_test == -1) {
  1898. char *sbcast_env = getenv("SLURM_SBCAST_AUTH_FAIL_TEST");
  1899. if (sbcast_env)
  1900. bad_cred_test = atoi(sbcast_env);
  1901. else
  1902. bad_cred_test = 0;
  1903. }
  1904. if (bad_cred_test > 0) {
  1905. int i = ((int) time(NULL)) % sbcast_cred->siglen;
  1906. char save_sig = sbcast_cred->signature[i];
  1907. sbcast_cred->signature[i]++;
  1908. packmem(sbcast_cred->signature, sbcast_cred->siglen, buffer);
  1909. sbcast_cred->signature[i] = save_sig;
  1910. } else {
  1911. packmem(sbcast_cred->signature, sbcast_cred->siglen, buffer);
  1912. }
  1913. }
  1914. /* Pack an sbcast credential into a buffer including the digital signature */
  1915. sbcast_cred_t *unpack_sbcast_cred(Buf buffer)
  1916. {
  1917. uint32_t len;
  1918. sbcast_cred_t *sbcast_cred;
  1919. uint32_t uint32_tmp;
  1920. sbcast_cred = xmalloc(sizeof(struct sbcast_cred));
  1921. safe_unpack_time(&sbcast_cred->ctime, buffer);
  1922. safe_unpack_time(&sbcast_cred->expiration, buffer);
  1923. safe_unpack32(&sbcast_cred->jobid, buffer);
  1924. safe_unpackstr_xmalloc(&sbcast_cred->nodes, &uint32_tmp, buffer);
  1925. /* "sigp" must be last */
  1926. safe_unpackmem_xmalloc(&sbcast_cred->signature, &len, buffer);
  1927. sbcast_cred->siglen = len;
  1928. xassert(len > 0);
  1929. return sbcast_cred;
  1930. unpack_error:
  1931. delete_sbcast_cred(sbcast_cred);
  1932. return NULL;
  1933. }
  1934. void print_sbcast_cred(sbcast_cred_t *sbcast_cred)
  1935. {
  1936. info("Sbcast_cred: Jobid %u", sbcast_cred->jobid );
  1937. info("Sbcast_cred: Nodes %s", sbcast_cred->nodes );
  1938. info("Sbcast_cred: ctime %s", ctime(&sbcast_cred->ctime) );
  1939. info("Sbcast_cred: Expire %s", ctime(&sbcast_cred->expiration) );
  1940. }