PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/src/common/slurm_protocol_api.c

https://github.com/cfenoy/slurm
C | 3803 lines | 2432 code | 469 blank | 902 comment | 444 complexity | 1d3b20f4ed4c4eb5bcf66e63a2d209a7 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * slurm_protocol_api.c - high-level slurm communication functions
  3. *****************************************************************************
  4. * Copyright (C) 2002-2007 The Regents of the University of California.
  5. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  6. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  7. * Written by Kevin Tew <tew1@llnl.gov>, et. al.
  8. * CODE-OCEC-09-009. All rights reserved.
  9. *
  10. * This file is part of SLURM, a resource management program.
  11. * For details, see <http://www.schedmd.com/slurmdocs/>.
  12. * Please also read the included file: DISCLAIMER.
  13. *
  14. * SLURM is free software; you can redistribute it and/or modify it under
  15. * the terms of the GNU General Public License as published by the Free
  16. * Software Foundation; either version 2 of the License, or (at your option)
  17. * any later version.
  18. *
  19. * In addition, as a special exception, the copyright holders give permission
  20. * to link the code of portions of this program with the OpenSSL library under
  21. * certain conditions as described in each individual source file, and
  22. * distribute linked combinations including the two. You must obey the GNU
  23. * General Public License in all respects for all of the code used other than
  24. * OpenSSL. If you modify file(s) with this exception, you may extend this
  25. * exception to your version of the file(s), but you are not obligated to do
  26. * so. If you do not wish to do so, delete this exception statement from your
  27. * version. If you delete this exception statement from all source files in
  28. * the program, then also delete it here.
  29. *
  30. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  31. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  32. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  33. * details.
  34. *
  35. * You should have received a copy of the GNU General Public License along
  36. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  37. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  38. \*****************************************************************************/
  39. #if HAVE_CONFIG_H
  40. # include "config.h"
  41. #endif
  42. /* GLOBAL INCLUDES */
  43. #ifdef WITH_PTHREADS
  44. # include <pthread.h>
  45. #endif /* WITH_PTHREADS */
  46. #include <errno.h>
  47. #include <stdio.h>
  48. #include <stdlib.h>
  49. #include <string.h>
  50. #include <sys/stat.h>
  51. #include <sys/types.h>
  52. #include <time.h>
  53. #include <unistd.h>
  54. #include <ctype.h>
  55. /* PROJECT INCLUDES */
  56. #include "src/common/macros.h"
  57. #include "src/common/pack.h"
  58. #include "src/common/parse_spec.h"
  59. #include "src/common/read_config.h"
  60. #include "src/common/slurm_auth.h"
  61. #include "src/common/slurm_protocol_interface.h"
  62. #include "src/common/slurm_protocol_api.h"
  63. #include "src/common/slurm_protocol_common.h"
  64. #include "src/common/slurm_protocol_pack.h"
  65. #include "src/common/xmalloc.h"
  66. #include "src/common/xstring.h"
  67. #include "src/common/log.h"
  68. #include "src/common/forward.h"
  69. #include "src/slurmdbd/read_config.h"
  70. #include "src/common/slurm_accounting_storage.h"
  71. /* EXTERNAL VARIABLES */
  72. /* #DEFINES */
  73. #define _DEBUG 0
  74. #define MAX_SHUTDOWN_RETRY 5
  75. /* STATIC VARIABLES */
  76. /* static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; */
  77. static slurm_protocol_config_t proto_conf_default;
  78. static slurm_protocol_config_t *proto_conf = &proto_conf_default;
  79. /* static slurm_ctl_conf_t slurmctld_conf; */
  80. static int message_timeout = -1;
  81. /* STATIC FUNCTIONS */
  82. static char *_global_auth_key(void);
  83. static void _remap_slurmctld_errno(void);
  84. static int _unpack_msg_uid(Buf buffer);
  85. #if _DEBUG
  86. static void _print_data(char *data, int len);
  87. #endif
  88. /* define the slurmdbd_options flag */
  89. slurm_dbd_conf_t *slurmdbd_conf = NULL;
  90. /**********************************************************************\
  91. * protocol configuration functions
  92. \**********************************************************************/
  93. /* slurm_set_api_config
  94. * sets the slurm_protocol_config object
  95. * NOT THREAD SAFE
  96. * IN protocol_conf - slurm_protocol_config object
  97. *
  98. * XXX: Why isn't the "config_lock" mutex used here?
  99. */
  100. int slurm_set_api_config(slurm_protocol_config_t * protocol_conf)
  101. {
  102. proto_conf = protocol_conf;
  103. return SLURM_SUCCESS;
  104. }
  105. /* slurm_get_api_config
  106. * returns a pointer to the current slurm_protocol_config object
  107. * RET slurm_protocol_config_t - current slurm_protocol_config object
  108. */
  109. slurm_protocol_config_t *slurm_get_api_config(void)
  110. {
  111. return proto_conf;
  112. }
  113. /* slurm_api_set_conf_file
  114. * set slurm configuration file to a non-default value
  115. * pathname IN - pathname of slurm configuration file to be used
  116. */
  117. extern void slurm_api_set_conf_file(char *pathname)
  118. {
  119. slurm_conf_reinit(pathname);
  120. return;
  121. }
  122. /* slurm_api_set_default_config
  123. * called by the send_controller_msg function to insure that at least
  124. * the compiled in default slurm_protocol_config object is initialized
  125. * RET int - return code
  126. */
  127. int slurm_api_set_default_config(void)
  128. {
  129. int rc = SLURM_SUCCESS;
  130. slurm_ctl_conf_t *conf;
  131. /*slurm_conf_init(NULL);*/
  132. conf = slurm_conf_lock();
  133. if (conf->control_addr == NULL) {
  134. error("Unable to establish controller machine");
  135. rc = SLURM_ERROR;
  136. goto cleanup;
  137. }
  138. if (conf->slurmctld_port == 0) {
  139. error("Unable to establish controller port");
  140. rc = SLURM_ERROR;
  141. goto cleanup;
  142. }
  143. slurm_set_addr(&proto_conf_default.primary_controller,
  144. conf->slurmctld_port,
  145. conf->control_addr);
  146. if (proto_conf_default.primary_controller.sin_port == 0) {
  147. error("Unable to establish control machine address");
  148. rc = SLURM_ERROR;
  149. goto cleanup;
  150. }
  151. if (conf->backup_addr) {
  152. slurm_set_addr(&proto_conf_default.secondary_controller,
  153. conf->slurmctld_port,
  154. conf->backup_addr);
  155. }
  156. proto_conf = &proto_conf_default;
  157. cleanup:
  158. slurm_conf_unlock();
  159. return rc;
  160. }
  161. /* slurm_api_clear_config
  162. * execute this only at program termination to free all memory */
  163. void slurm_api_clear_config(void)
  164. {
  165. slurm_conf_destroy();
  166. }
  167. /* slurm_get_complete_wait
  168. * RET CompleteWait value from slurm.conf
  169. */
  170. uint16_t slurm_get_complete_wait(void)
  171. {
  172. uint16_t complete_wait = 0;
  173. slurm_ctl_conf_t *conf;
  174. if (slurmdbd_conf) {
  175. } else {
  176. conf = slurm_conf_lock();
  177. complete_wait = conf->complete_wait;
  178. slurm_conf_unlock();
  179. }
  180. return complete_wait;
  181. }
  182. /* update internal configuration data structure as needed.
  183. * exit with lock set */
  184. /* static inline void _lock_update_config() */
  185. /* { */
  186. /* slurm_api_set_default_config(); */
  187. /* slurm_mutex_lock(&config_lock); */
  188. /* } */
  189. /* slurm_get_batch_start_timeout
  190. * RET BatchStartTimeout value from slurm.conf
  191. */
  192. uint16_t slurm_get_batch_start_timeout(void)
  193. {
  194. uint16_t batch_start_timeout = 0;
  195. slurm_ctl_conf_t *conf;
  196. if (slurmdbd_conf) {
  197. } else {
  198. conf = slurm_conf_lock();
  199. batch_start_timeout = conf->batch_start_timeout;
  200. slurm_conf_unlock();
  201. }
  202. return batch_start_timeout;
  203. }
  204. /* slurm_get_suspend_timeout
  205. * RET SuspendTimeout value from slurm.conf
  206. */
  207. uint16_t slurm_get_suspend_timeout(void)
  208. {
  209. uint16_t suspend_timeout = 0;
  210. slurm_ctl_conf_t *conf;
  211. if (slurmdbd_conf) {
  212. } else {
  213. conf = slurm_conf_lock();
  214. suspend_timeout = conf->suspend_timeout;
  215. slurm_conf_unlock();
  216. }
  217. return suspend_timeout;
  218. }
  219. /* slurm_get_resume_timeout
  220. * RET ResumeTimeout value from slurm.conf
  221. */
  222. uint16_t slurm_get_resume_timeout(void)
  223. {
  224. uint16_t resume_timeout = 0;
  225. slurm_ctl_conf_t *conf;
  226. if (slurmdbd_conf) {
  227. } else {
  228. conf = slurm_conf_lock();
  229. resume_timeout = conf->resume_timeout;
  230. slurm_conf_unlock();
  231. }
  232. return resume_timeout;
  233. }
  234. /* slurm_get_suspend_time
  235. * RET SuspendTime value from slurm.conf
  236. */
  237. uint32_t slurm_get_suspend_time(void)
  238. {
  239. uint32_t suspend_time = 0;
  240. slurm_ctl_conf_t *conf;
  241. if (slurmdbd_conf) {
  242. } else {
  243. conf = slurm_conf_lock();
  244. suspend_time = conf->suspend_time;
  245. slurm_conf_unlock();
  246. }
  247. return suspend_time;
  248. }
  249. /* slurm_get_def_mem_per_cpu
  250. * RET DefMemPerCPU/Node value from slurm.conf
  251. */
  252. uint32_t slurm_get_def_mem_per_cpu(void)
  253. {
  254. uint32_t mem_per_cpu = 0;
  255. slurm_ctl_conf_t *conf;
  256. if (slurmdbd_conf) {
  257. } else {
  258. conf = slurm_conf_lock();
  259. mem_per_cpu = conf->def_mem_per_cpu;
  260. slurm_conf_unlock();
  261. }
  262. return mem_per_cpu;
  263. }
  264. /* slurm_get_kill_on_bad_exit
  265. * RET KillOnBadExit value from slurm.conf
  266. */
  267. uint16_t slurm_get_kill_on_bad_exit(void)
  268. {
  269. uint16_t kill_on_bad_exit = 0;
  270. slurm_ctl_conf_t *conf;
  271. if (slurmdbd_conf) {
  272. } else {
  273. conf = slurm_conf_lock();
  274. kill_on_bad_exit = conf->kill_on_bad_exit;
  275. slurm_conf_unlock();
  276. }
  277. return kill_on_bad_exit;
  278. }
  279. /* slurm_get_debug_flags
  280. * RET DebugFlags value from slurm.conf
  281. */
  282. uint32_t slurm_get_debug_flags(void)
  283. {
  284. uint32_t debug_flags = 0;
  285. slurm_ctl_conf_t *conf;
  286. if (slurmdbd_conf) {
  287. } else {
  288. conf = slurm_conf_lock();
  289. debug_flags = conf->debug_flags;
  290. slurm_conf_unlock();
  291. }
  292. return debug_flags;
  293. }
  294. /* slurm_set_debug_flags
  295. */
  296. void slurm_set_debug_flags(uint32_t debug_flags)
  297. {
  298. slurm_ctl_conf_t *conf;
  299. if (slurmdbd_conf) {
  300. } else {
  301. conf = slurm_conf_lock();
  302. conf->debug_flags = debug_flags;
  303. slurm_conf_unlock();
  304. }
  305. }
  306. /* slurm_get_max_mem_per_cpu
  307. * RET MaxMemPerCPU/Node value from slurm.conf
  308. */
  309. uint32_t slurm_get_max_mem_per_cpu(void)
  310. {
  311. uint32_t mem_per_cpu = 0;
  312. slurm_ctl_conf_t *conf;
  313. if (slurmdbd_conf) {
  314. } else {
  315. conf = slurm_conf_lock();
  316. mem_per_cpu = conf->max_mem_per_cpu;
  317. slurm_conf_unlock();
  318. }
  319. return mem_per_cpu;
  320. }
  321. /* slurm_get_epilog_msg_time
  322. * RET EpilogMsgTime value from slurm.conf
  323. */
  324. uint32_t slurm_get_epilog_msg_time(void)
  325. {
  326. uint32_t epilog_msg_time = 0;
  327. slurm_ctl_conf_t *conf;
  328. if (slurmdbd_conf) {
  329. } else {
  330. conf = slurm_conf_lock();
  331. epilog_msg_time = conf->epilog_msg_time;
  332. slurm_conf_unlock();
  333. }
  334. return epilog_msg_time;
  335. }
  336. /* slurm_get_env_timeout
  337. * return default timeout for srun/sbatch --get-user-env option
  338. */
  339. extern int slurm_get_env_timeout(void)
  340. {
  341. int timeout = 0;
  342. slurm_ctl_conf_t *conf;
  343. if (slurmdbd_conf) {
  344. } else {
  345. conf = slurm_conf_lock();
  346. timeout = conf->get_env_timeout;
  347. slurm_conf_unlock();
  348. }
  349. return timeout;
  350. }
  351. /* slurm_get_mpi_default
  352. * get default mpi value from slurmctld_conf object
  353. * RET char * - mpi default value from slurm.conf, MUST be xfreed by caller
  354. */
  355. char *slurm_get_mpi_default(void)
  356. {
  357. char *mpi_default = NULL;
  358. slurm_ctl_conf_t *conf;
  359. if (slurmdbd_conf) {
  360. } else {
  361. conf = slurm_conf_lock();
  362. mpi_default = xstrdup(conf->mpi_default);
  363. slurm_conf_unlock();
  364. }
  365. return mpi_default;
  366. }
  367. /* slurm_get_mpi_params
  368. * get mpi parameters value from slurmctld_conf object
  369. * RET char * - mpi default value from slurm.conf, MUST be xfreed by caller
  370. */
  371. char *slurm_get_mpi_params(void)
  372. {
  373. char *mpi_params = NULL;
  374. slurm_ctl_conf_t *conf;
  375. if (slurmdbd_conf) {
  376. } else {
  377. conf = slurm_conf_lock();
  378. mpi_params = xstrdup(conf->mpi_params);
  379. slurm_conf_unlock();
  380. }
  381. return mpi_params;
  382. }
  383. /* slurm_get_msg_timeout
  384. * get default message timeout value from slurmctld_conf object
  385. */
  386. uint16_t slurm_get_msg_timeout(void)
  387. {
  388. uint16_t msg_timeout = 0;
  389. slurm_ctl_conf_t *conf;
  390. if (slurmdbd_conf) {
  391. msg_timeout = slurmdbd_conf->msg_timeout;
  392. } else {
  393. conf = slurm_conf_lock();
  394. msg_timeout = conf->msg_timeout;
  395. slurm_conf_unlock();
  396. #ifdef MEMORY_LEAK_DEBUG
  397. msg_timeout *= 4;
  398. #endif
  399. }
  400. return msg_timeout;
  401. }
  402. /* slurm_get_plugin_dir
  403. * get plugin directory from slurmctld_conf object
  404. * RET char * - plugin directory, MUST be xfreed by caller
  405. */
  406. char *slurm_get_plugin_dir(void)
  407. {
  408. char *plugin_dir = NULL;
  409. slurm_ctl_conf_t *conf;
  410. if (slurmdbd_conf) {
  411. plugin_dir = xstrdup(slurmdbd_conf->plugindir);
  412. } else {
  413. conf = slurm_conf_lock();
  414. plugin_dir = xstrdup(conf->plugindir);
  415. slurm_conf_unlock();
  416. }
  417. return plugin_dir;
  418. }
  419. /* slurm_get_priority_decay_hl
  420. * returns the priority decay half life in seconds from slurmctld_conf object
  421. * RET uint32_t - decay_hl in secs.
  422. */
  423. uint32_t slurm_get_priority_decay_hl(void)
  424. {
  425. uint32_t priority_hl = NO_VAL;
  426. slurm_ctl_conf_t *conf;
  427. if (slurmdbd_conf) {
  428. } else {
  429. conf = slurm_conf_lock();
  430. priority_hl = conf->priority_decay_hl;
  431. slurm_conf_unlock();
  432. }
  433. return priority_hl;
  434. }
  435. /* slurm_get_priority_calc_period
  436. * returns the seconds between priority decay calculation from slurmctld_conf
  437. * RET uint32_t - calc_period in secs.
  438. */
  439. uint32_t slurm_get_priority_calc_period(void)
  440. {
  441. uint32_t calc_period = NO_VAL;
  442. slurm_ctl_conf_t *conf;
  443. if (slurmdbd_conf) {
  444. } else {
  445. conf = slurm_conf_lock();
  446. calc_period = conf->priority_calc_period;
  447. slurm_conf_unlock();
  448. }
  449. return calc_period;
  450. }
  451. /* slurm_get_priority_favor_small
  452. * returns weither or not we are favoring small jobs from slurmctld_conf object
  453. * RET bool - true if favor small, false else.
  454. */
  455. bool slurm_get_priority_favor_small(void)
  456. {
  457. bool factor = 0;
  458. slurm_ctl_conf_t *conf;
  459. if (slurmdbd_conf) {
  460. } else {
  461. conf = slurm_conf_lock();
  462. factor = conf->priority_favor_small;
  463. slurm_conf_unlock();
  464. }
  465. return factor;
  466. }
  467. /* slurm_get_priority_max_age
  468. * returns the priority age max in seconds from slurmctld_conf object
  469. * RET uint32_t - age_max in secs.
  470. */
  471. uint32_t slurm_get_priority_max_age(void)
  472. {
  473. uint32_t age = NO_VAL;
  474. slurm_ctl_conf_t *conf;
  475. if (slurmdbd_conf) {
  476. } else {
  477. conf = slurm_conf_lock();
  478. age = conf->priority_max_age;
  479. slurm_conf_unlock();
  480. }
  481. return age;
  482. }
  483. /* slurm_get_priority_reset_period
  484. * returns the priority usage reset period from slurmctld_conf object
  485. * RET uint16_t - flag, see PRIORITY_RESET_* in slurm/slurm.h.
  486. */
  487. uint16_t slurm_get_priority_reset_period(void)
  488. {
  489. uint16_t reset_period = (uint16_t) 0;
  490. slurm_ctl_conf_t *conf;
  491. if (slurmdbd_conf) {
  492. } else {
  493. conf = slurm_conf_lock();
  494. reset_period = conf->priority_reset_period;
  495. slurm_conf_unlock();
  496. }
  497. return reset_period;
  498. }
  499. /* slurm_get_priority_type
  500. * returns the priority type from slurmctld_conf object
  501. * RET char * - priority type, MUST be xfreed by caller
  502. */
  503. char *slurm_get_priority_type(void)
  504. {
  505. char *priority_type = NULL;
  506. slurm_ctl_conf_t *conf;
  507. if (slurmdbd_conf) {
  508. } else {
  509. conf = slurm_conf_lock();
  510. priority_type = xstrdup(conf->priority_type);
  511. slurm_conf_unlock();
  512. }
  513. return priority_type;
  514. }
  515. /* slurm_get_priority_weight_age
  516. * returns the priority weight for age from slurmctld_conf object
  517. * RET uint32_t - factor weight.
  518. */
  519. uint32_t slurm_get_priority_weight_age(void)
  520. {
  521. uint32_t factor = NO_VAL;
  522. slurm_ctl_conf_t *conf;
  523. if (slurmdbd_conf) {
  524. } else {
  525. conf = slurm_conf_lock();
  526. factor = conf->priority_weight_age;
  527. slurm_conf_unlock();
  528. }
  529. return factor;
  530. }
  531. /* slurm_get_priority_weight_fairshare
  532. * returns the priority weight for fairshare from slurmctld_conf object
  533. * RET uint32_t - factor weight.
  534. */
  535. uint32_t slurm_get_priority_weight_fairshare(void)
  536. {
  537. uint32_t factor = NO_VAL;
  538. slurm_ctl_conf_t *conf;
  539. if (slurmdbd_conf) {
  540. } else {
  541. conf = slurm_conf_lock();
  542. factor = conf->priority_weight_fs;
  543. slurm_conf_unlock();
  544. }
  545. return factor;
  546. }
  547. /* slurm_get_priority_weight_job_size
  548. * returns the priority weight for job size from slurmctld_conf object
  549. * RET uint32_t - factor weight.
  550. */
  551. uint32_t slurm_get_priority_weight_job_size(void)
  552. {
  553. uint32_t factor = NO_VAL;
  554. slurm_ctl_conf_t *conf;
  555. if (slurmdbd_conf) {
  556. } else {
  557. conf = slurm_conf_lock();
  558. factor = conf->priority_weight_js;
  559. slurm_conf_unlock();
  560. }
  561. return factor;
  562. }
  563. /* slurm_get_priority_weight_partition
  564. * returns the priority weight for partitions from slurmctld_conf object
  565. * RET uint32_t - factor weight.
  566. */
  567. uint32_t slurm_get_priority_weight_partition(void)
  568. {
  569. uint32_t factor = NO_VAL;
  570. slurm_ctl_conf_t *conf;
  571. if (slurmdbd_conf) {
  572. } else {
  573. conf = slurm_conf_lock();
  574. factor = conf->priority_weight_part;
  575. slurm_conf_unlock();
  576. }
  577. return factor;
  578. }
  579. /* slurm_get_priority_weight_qos
  580. * returns the priority weight for QOS from slurmctld_conf object
  581. * RET uint32_t - factor weight.
  582. */
  583. uint32_t slurm_get_priority_weight_qos(void)
  584. {
  585. uint32_t factor = NO_VAL;
  586. slurm_ctl_conf_t *conf;
  587. if (slurmdbd_conf) {
  588. } else {
  589. conf = slurm_conf_lock();
  590. factor = conf->priority_weight_qos;
  591. slurm_conf_unlock();
  592. }
  593. return factor;
  594. }
  595. /* slurm_get_private_data
  596. * get private data from slurmctld_conf object
  597. */
  598. uint16_t slurm_get_private_data(void)
  599. {
  600. uint16_t private_data = 0;
  601. slurm_ctl_conf_t *conf;
  602. if (slurmdbd_conf) {
  603. private_data = slurmdbd_conf->private_data;
  604. } else {
  605. conf = slurm_conf_lock();
  606. private_data = conf->private_data;
  607. slurm_conf_unlock();
  608. }
  609. return private_data;
  610. }
  611. /* slurm_get_state_save_location
  612. * get state_save_location from slurmctld_conf object from slurmctld_conf object
  613. * RET char * - state_save_location directory, MUST be xfreed by caller
  614. */
  615. char *slurm_get_state_save_location(void)
  616. {
  617. char *state_save_loc = NULL;
  618. slurm_ctl_conf_t *conf;
  619. if (slurmdbd_conf) {
  620. } else {
  621. conf = slurm_conf_lock();
  622. state_save_loc = xstrdup(conf->state_save_location);
  623. slurm_conf_unlock();
  624. }
  625. return state_save_loc;
  626. }
  627. /* slurm_get_auth_type
  628. * returns the authentication type from slurmctld_conf object
  629. * RET char * - auth type, MUST be xfreed by caller
  630. */
  631. char *slurm_get_auth_type(void)
  632. {
  633. char *auth_type = NULL;
  634. slurm_ctl_conf_t *conf = NULL;
  635. if (slurmdbd_conf) {
  636. auth_type = xstrdup(slurmdbd_conf->auth_type);
  637. } else {
  638. conf = slurm_conf_lock();
  639. auth_type = xstrdup(conf->authtype);
  640. slurm_conf_unlock();
  641. }
  642. return auth_type;
  643. }
  644. /* slurm_get_checkpoint_type
  645. * returns the checkpoint_type from slurmctld_conf object
  646. * RET char * - checkpoint type, MUST be xfreed by caller
  647. */
  648. extern char *slurm_get_checkpoint_type(void)
  649. {
  650. char *checkpoint_type = NULL;
  651. slurm_ctl_conf_t *conf;
  652. if (slurmdbd_conf) {
  653. } else {
  654. conf = slurm_conf_lock();
  655. checkpoint_type = xstrdup(conf->checkpoint_type);
  656. slurm_conf_unlock();
  657. }
  658. return checkpoint_type;
  659. }
  660. /* slurm_get_cluster_name
  661. * returns the cluster name from slurmctld_conf object
  662. * RET char * - cluster name, MUST be xfreed by caller
  663. */
  664. char *slurm_get_cluster_name(void)
  665. {
  666. char *name = NULL;
  667. slurm_ctl_conf_t *conf;
  668. if (slurmdbd_conf) {
  669. } else {
  670. conf = slurm_conf_lock();
  671. name = xstrdup(conf->cluster_name);
  672. slurm_conf_unlock();
  673. }
  674. return name;
  675. }
  676. /* slurm_get_crypto_type
  677. * returns the crypto_type from slurmctld_conf object
  678. * RET char * - crypto type, MUST be xfreed by caller
  679. */
  680. extern char *slurm_get_crypto_type(void)
  681. {
  682. char *crypto_type = NULL;
  683. slurm_ctl_conf_t *conf;
  684. if (slurmdbd_conf) {
  685. } else {
  686. conf = slurm_conf_lock();
  687. crypto_type = xstrdup(conf->crypto_type);
  688. slurm_conf_unlock();
  689. }
  690. return crypto_type;
  691. }
  692. /* slurm_get_topology_plugin
  693. * returns the value of topology_plugin in slurmctld_conf object
  694. * RET char * - topology type, MUST be xfreed by caller
  695. */
  696. extern char * slurm_get_topology_plugin(void)
  697. {
  698. char *topology_plugin = NULL;
  699. slurm_ctl_conf_t *conf;
  700. if (slurmdbd_conf) {
  701. } else {
  702. conf = slurm_conf_lock();
  703. topology_plugin = xstrdup(conf->topology_plugin);
  704. slurm_conf_unlock();
  705. }
  706. return topology_plugin;
  707. }
  708. /* slurm_get_propagate_prio_process
  709. * return the PropagatePrioProcess flag from slurmctld_conf object
  710. */
  711. extern uint16_t slurm_get_propagate_prio_process(void)
  712. {
  713. uint16_t propagate_prio = 0;
  714. slurm_ctl_conf_t *conf;
  715. if (slurmdbd_conf) {
  716. } else {
  717. conf = slurm_conf_lock();
  718. propagate_prio = conf->propagate_prio_process;
  719. slurm_conf_unlock();
  720. }
  721. return propagate_prio;
  722. }
  723. /* slurm_get_fast_schedule
  724. * returns the value of fast_schedule in slurmctld_conf object
  725. */
  726. extern uint16_t slurm_get_fast_schedule(void)
  727. {
  728. uint16_t fast_val = 0;
  729. slurm_ctl_conf_t *conf;
  730. if (slurmdbd_conf) {
  731. } else {
  732. conf = slurm_conf_lock();
  733. fast_val = conf->fast_schedule;
  734. slurm_conf_unlock();
  735. }
  736. return fast_val;
  737. }
  738. /* slurm_get_track_wckey
  739. * returns the value of track_wckey in slurmctld_conf object
  740. */
  741. extern uint16_t slurm_get_track_wckey(void)
  742. {
  743. uint16_t track_wckey = 0;
  744. slurm_ctl_conf_t *conf;
  745. if (slurmdbd_conf) {
  746. track_wckey = slurmdbd_conf->track_wckey;
  747. } else {
  748. conf = slurm_conf_lock();
  749. track_wckey = conf->track_wckey;
  750. slurm_conf_unlock();
  751. }
  752. return track_wckey;
  753. }
  754. /* slurm_set_tree_width
  755. * sets the value of tree_width in slurmctld_conf object
  756. * RET 0 or error code
  757. */
  758. extern int slurm_set_tree_width(uint16_t tree_width)
  759. {
  760. slurm_ctl_conf_t *conf;
  761. if (slurmdbd_conf) {
  762. } else {
  763. conf = slurm_conf_lock();
  764. if (tree_width == 0) {
  765. error("can't have span count of 0");
  766. return SLURM_ERROR;
  767. }
  768. conf->tree_width = tree_width;
  769. slurm_conf_unlock();
  770. }
  771. return 0;
  772. }
  773. /* slurm_get_tree_width
  774. * returns the value of tree_width in slurmctld_conf object
  775. */
  776. extern uint16_t slurm_get_tree_width(void)
  777. {
  778. uint16_t tree_width = 0;
  779. slurm_ctl_conf_t *conf;
  780. if (slurmdbd_conf) {
  781. } else {
  782. conf = slurm_conf_lock();
  783. tree_width = conf->tree_width;
  784. slurm_conf_unlock();
  785. }
  786. return tree_width;
  787. }
  788. /* slurm_get_vsize_factor
  789. * returns the value of vsize_factor in slurmctld_conf object
  790. */
  791. extern uint16_t slurm_get_vsize_factor(void)
  792. {
  793. uint16_t vsize_factor = 0;
  794. slurm_ctl_conf_t *conf;
  795. if (slurmdbd_conf) {
  796. } else {
  797. conf = slurm_conf_lock();
  798. vsize_factor = conf->vsize_factor;
  799. slurm_conf_unlock();
  800. }
  801. return vsize_factor;
  802. }
  803. /* slurm_set_auth_type
  804. * set the authentication type in slurmctld_conf object
  805. * used for security testing purposes
  806. * RET 0 or error code
  807. */
  808. extern int slurm_set_auth_type(char *auth_type)
  809. {
  810. slurm_ctl_conf_t *conf;
  811. if (slurmdbd_conf) {
  812. xfree(slurmdbd_conf->auth_type);
  813. slurmdbd_conf->auth_type = xstrdup(auth_type);
  814. } else {
  815. conf = slurm_conf_lock();
  816. xfree(conf->authtype);
  817. conf->authtype = xstrdup(auth_type);
  818. slurm_conf_unlock();
  819. }
  820. return 0;
  821. }
  822. /* slurm_get_hash_val
  823. * get hash val of the slurm.conf from slurmctld_conf object from
  824. * slurmctld_conf object
  825. * RET uint32_t - hash_val
  826. */
  827. uint32_t slurm_get_hash_val(void)
  828. {
  829. uint32_t hash_val;
  830. slurm_ctl_conf_t *conf;
  831. if (slurmdbd_conf) {
  832. hash_val = NO_VAL;
  833. } else {
  834. conf = slurm_conf_lock();
  835. hash_val = conf->hash_val;
  836. slurm_conf_unlock();
  837. }
  838. return hash_val;
  839. }
  840. /* slurm_get_health_check_program
  841. * get health_check_program from slurmctld_conf object from
  842. * slurmctld_conf object
  843. * RET char * - health_check_program, MUST be xfreed by caller
  844. */
  845. char *slurm_get_health_check_program(void)
  846. {
  847. char *health_check_program = NULL;
  848. slurm_ctl_conf_t *conf;
  849. if (slurmdbd_conf) {
  850. } else {
  851. conf = slurm_conf_lock();
  852. health_check_program = xstrdup(conf->health_check_program);
  853. slurm_conf_unlock();
  854. }
  855. return health_check_program;
  856. }
  857. /* slurm_get_gres_plugins
  858. * get gres_plugins from slurmctld_conf object from
  859. * slurmctld_conf object
  860. * RET char * - gres_plugins, MUST be xfreed by caller
  861. */
  862. char *slurm_get_gres_plugins(void)
  863. {
  864. char *gres_plugins = NULL;
  865. slurm_ctl_conf_t *conf;
  866. if (slurmdbd_conf) {
  867. } else {
  868. conf = slurm_conf_lock();
  869. gres_plugins = xstrdup(conf->gres_plugins);
  870. slurm_conf_unlock();
  871. }
  872. return gres_plugins;
  873. }
  874. /* slurm_get_job_submit_plugins
  875. * get job_submit_plugins from slurmctld_conf object from
  876. * slurmctld_conf object
  877. * RET char * - job_submit_plugins, MUST be xfreed by caller
  878. */
  879. char *slurm_get_job_submit_plugins(void)
  880. {
  881. char *job_submit_plugins = NULL;
  882. slurm_ctl_conf_t *conf;
  883. if (slurmdbd_conf) {
  884. } else {
  885. conf = slurm_conf_lock();
  886. job_submit_plugins = xstrdup(conf->job_submit_plugins);
  887. slurm_conf_unlock();
  888. }
  889. return job_submit_plugins;
  890. }
  891. /* slurm_get_accounting_storage_type
  892. * returns the accounting storage type from slurmctld_conf object
  893. * RET char * - accounting storage type, MUST be xfreed by caller
  894. */
  895. char *slurm_get_accounting_storage_type(void)
  896. {
  897. char *accounting_type;
  898. slurm_ctl_conf_t *conf;
  899. if (slurmdbd_conf) {
  900. accounting_type = xstrdup(slurmdbd_conf->storage_type);
  901. } else {
  902. conf = slurm_conf_lock();
  903. accounting_type = xstrdup(conf->accounting_storage_type);
  904. slurm_conf_unlock();
  905. }
  906. return accounting_type;
  907. }
  908. /* slurm_get_accounting_storage_user
  909. * returns the storage user from slurmctld_conf object
  910. * RET char * - storage user, MUST be xfreed by caller
  911. */
  912. char *slurm_get_accounting_storage_user(void)
  913. {
  914. char *storage_user;
  915. slurm_ctl_conf_t *conf;
  916. if (slurmdbd_conf) {
  917. storage_user = xstrdup(slurmdbd_conf->storage_user);
  918. } else {
  919. conf = slurm_conf_lock();
  920. storage_user = xstrdup(conf->accounting_storage_user);
  921. slurm_conf_unlock();
  922. }
  923. return storage_user;
  924. }
  925. /* slurm_set_accounting_storage_user
  926. * IN: char *user (name of file or database)
  927. * RET 0 or error code
  928. */
  929. int slurm_set_accounting_storage_user(char *user)
  930. {
  931. slurm_ctl_conf_t *conf;
  932. if (slurmdbd_conf) {
  933. xfree(slurmdbd_conf->storage_user);
  934. slurmdbd_conf->storage_user = xstrdup(user);
  935. } else {
  936. conf = slurm_conf_lock();
  937. xfree(conf->accounting_storage_user);
  938. conf->accounting_storage_user = xstrdup(user);
  939. slurm_conf_unlock();
  940. }
  941. return 0;
  942. }
  943. /* slurm_get_accounting_storage_backup_host
  944. * returns the storage backup host from slurmctld_conf object
  945. * RET char * - storage backup host, MUST be xfreed by caller
  946. */
  947. char *slurm_get_accounting_storage_backup_host(void)
  948. {
  949. char *storage_host;
  950. slurm_ctl_conf_t *conf;
  951. if (slurmdbd_conf) {
  952. storage_host = xstrdup(slurmdbd_conf->storage_backup_host);
  953. } else {
  954. conf = slurm_conf_lock();
  955. storage_host = xstrdup(conf->accounting_storage_backup_host);
  956. slurm_conf_unlock();
  957. }
  958. return storage_host;
  959. }
  960. /* slurm_get_accounting_storage_host
  961. * returns the storage host from slurmctld_conf object
  962. * RET char * - storage host, MUST be xfreed by caller
  963. */
  964. char *slurm_get_accounting_storage_host(void)
  965. {
  966. char *storage_host;
  967. slurm_ctl_conf_t *conf;
  968. if (slurmdbd_conf) {
  969. storage_host = xstrdup(slurmdbd_conf->storage_host);
  970. } else {
  971. conf = slurm_conf_lock();
  972. storage_host = xstrdup(conf->accounting_storage_host);
  973. slurm_conf_unlock();
  974. }
  975. return storage_host;
  976. }
  977. /* slurm_set_accounting_storage_host
  978. * IN: char *host (name of file or database)
  979. * RET 0 or error code
  980. */
  981. int slurm_set_accounting_storage_host(char *host)
  982. {
  983. slurm_ctl_conf_t *conf;
  984. if (slurmdbd_conf) {
  985. xfree(slurmdbd_conf->storage_host);
  986. slurmdbd_conf->storage_host = xstrdup(host);
  987. } else {
  988. conf = slurm_conf_lock();
  989. xfree(conf->accounting_storage_host);
  990. conf->accounting_storage_host = xstrdup(host);
  991. slurm_conf_unlock();
  992. }
  993. return 0;
  994. }
  995. /* slurm_get_accounting_storage_loc
  996. * returns the storage location from slurmctld_conf object
  997. * RET char * - storage location, MUST be xfreed by caller
  998. */
  999. char *slurm_get_accounting_storage_loc(void)
  1000. {
  1001. char *storage_loc;
  1002. slurm_ctl_conf_t *conf;
  1003. if (slurmdbd_conf) {
  1004. storage_loc = xstrdup(slurmdbd_conf->storage_loc);
  1005. } else {
  1006. conf = slurm_conf_lock();
  1007. storage_loc = xstrdup(conf->accounting_storage_loc);
  1008. slurm_conf_unlock();
  1009. }
  1010. return storage_loc;
  1011. }
  1012. /* slurm_set_accounting_storage_loc
  1013. * IN: char *loc (name of file or database)
  1014. * RET 0 or error code
  1015. */
  1016. int slurm_set_accounting_storage_loc(char *loc)
  1017. {
  1018. slurm_ctl_conf_t *conf;
  1019. if (slurmdbd_conf) {
  1020. xfree(slurmdbd_conf->storage_loc);
  1021. slurmdbd_conf->storage_loc = xstrdup(loc);
  1022. } else {
  1023. conf = slurm_conf_lock();
  1024. xfree(conf->accounting_storage_loc);
  1025. conf->accounting_storage_loc = xstrdup(loc);
  1026. slurm_conf_unlock();
  1027. }
  1028. return 0;
  1029. }
  1030. /* slurm_get_accounting_storage_enforce
  1031. * returns what level to enforce associations at
  1032. */
  1033. int slurm_get_accounting_storage_enforce(void)
  1034. {
  1035. int enforce = 0;
  1036. slurm_ctl_conf_t *conf;
  1037. if (slurmdbd_conf) {
  1038. } else {
  1039. conf = slurm_conf_lock();
  1040. enforce = conf->accounting_storage_enforce;
  1041. slurm_conf_unlock();
  1042. }
  1043. return enforce;
  1044. }
  1045. /* slurm_get_is_association_based_accounting
  1046. * returns if we are doing accounting by associations
  1047. */
  1048. int slurm_get_is_association_based_accounting(void)
  1049. {
  1050. int enforce = 0;
  1051. slurm_ctl_conf_t *conf;
  1052. if (slurmdbd_conf) {
  1053. return 1;
  1054. } else {
  1055. conf = slurm_conf_lock();
  1056. if (!strcasecmp(conf->accounting_storage_type,
  1057. "accounting_storage/slurmdbd") ||
  1058. !strcasecmp(conf->accounting_storage_type,
  1059. "accounting_storage/mysql") ||
  1060. !strcasecmp(conf->accounting_storage_type,
  1061. "accounting_storage/pgsql"))
  1062. enforce = 1;
  1063. slurm_conf_unlock();
  1064. }
  1065. return enforce;
  1066. }
  1067. /* slurm_get_accounting_storage_pass
  1068. * returns the storage password from slurmctld_conf object
  1069. * RET char * - storage password, MUST be xfreed by caller
  1070. */
  1071. char *slurm_get_accounting_storage_pass(void)
  1072. {
  1073. char *storage_pass;
  1074. slurm_ctl_conf_t *conf;
  1075. if (slurmdbd_conf) {
  1076. storage_pass = xstrdup(slurmdbd_conf->storage_pass);
  1077. } else {
  1078. conf = slurm_conf_lock();
  1079. storage_pass = xstrdup(conf->accounting_storage_pass);
  1080. slurm_conf_unlock();
  1081. }
  1082. return storage_pass;
  1083. }
  1084. /* _global_auth_key
  1085. * returns the storage password from slurmctld_conf or slurmdbd_conf object
  1086. * cache value in local buffer for best performance
  1087. * RET char * - storage password
  1088. */
  1089. static char *_global_auth_key(void)
  1090. {
  1091. static bool loaded_storage_pass = false;
  1092. static char storage_pass[512] = "\0";
  1093. static char *storage_pass_ptr = NULL;
  1094. slurm_ctl_conf_t *conf;
  1095. if (loaded_storage_pass)
  1096. return storage_pass_ptr;
  1097. if (slurmdbd_conf) {
  1098. if (slurmdbd_conf->auth_info) {
  1099. if (strlen(slurmdbd_conf->auth_info) >
  1100. sizeof(storage_pass))
  1101. fatal("AuthInfo is too long");
  1102. strncpy(storage_pass, slurmdbd_conf->auth_info,
  1103. sizeof(storage_pass));
  1104. storage_pass_ptr = storage_pass;
  1105. }
  1106. } else {
  1107. conf = slurm_conf_lock();
  1108. if (conf->accounting_storage_pass) {
  1109. if (strlen(conf->accounting_storage_pass) >
  1110. sizeof(storage_pass))
  1111. fatal("AccountingStoragePass is too long");
  1112. strncpy(storage_pass, conf->accounting_storage_pass,
  1113. sizeof(storage_pass));
  1114. storage_pass_ptr = storage_pass;
  1115. }
  1116. slurm_conf_unlock();
  1117. }
  1118. loaded_storage_pass = true;
  1119. return storage_pass_ptr;
  1120. }
  1121. /* slurm_get_accounting_storage_port
  1122. * returns the storage port from slurmctld_conf object
  1123. * RET uint32_t - storage port
  1124. */
  1125. uint32_t slurm_get_accounting_storage_port(void)
  1126. {
  1127. uint32_t storage_port;
  1128. slurm_ctl_conf_t *conf;
  1129. if (slurmdbd_conf) {
  1130. storage_port = slurmdbd_conf->storage_port;
  1131. } else {
  1132. conf = slurm_conf_lock();
  1133. storage_port = conf->accounting_storage_port;
  1134. slurm_conf_unlock();
  1135. }
  1136. return storage_port;
  1137. }
  1138. /* slurm_set_accounting_storage_port
  1139. * sets the storage port in slurmctld_conf object
  1140. * RET 0 or error code
  1141. */
  1142. int slurm_set_accounting_storage_port(uint32_t storage_port)
  1143. {
  1144. slurm_ctl_conf_t *conf;
  1145. if (slurmdbd_conf) {
  1146. slurmdbd_conf->storage_port = storage_port;
  1147. } else {
  1148. conf = slurm_conf_lock();
  1149. if (storage_port == 0) {
  1150. error("can't have storage port of 0");
  1151. return SLURM_ERROR;
  1152. }
  1153. conf->accounting_storage_port = storage_port;
  1154. slurm_conf_unlock();
  1155. }
  1156. return 0;
  1157. }
  1158. /* slurm_get_preempt_mode
  1159. * returns the PreemptMode value from slurmctld_conf object
  1160. * RET uint16_t - PreemptMode value (See PREEMPT_MODE_* in slurm.h)
  1161. */
  1162. uint16_t slurm_get_preempt_mode(void)
  1163. {
  1164. uint16_t preempt_mode = 0;
  1165. slurm_ctl_conf_t *conf;
  1166. if (slurmdbd_conf) {
  1167. } else {
  1168. conf = slurm_conf_lock();
  1169. preempt_mode = conf->preempt_mode;
  1170. slurm_conf_unlock();
  1171. }
  1172. return preempt_mode;
  1173. }
  1174. /* slurm_get_jobacct_gather_type
  1175. * returns the job accounting type from the slurmctld_conf object
  1176. * RET char * - job accounting type, MUST be xfreed by caller
  1177. */
  1178. char *slurm_get_jobacct_gather_type(void)
  1179. {
  1180. char *jobacct_type = NULL;
  1181. slurm_ctl_conf_t *conf;
  1182. if (slurmdbd_conf) {
  1183. } else {
  1184. conf = slurm_conf_lock();
  1185. jobacct_type = xstrdup(conf->job_acct_gather_type);
  1186. slurm_conf_unlock();
  1187. }
  1188. return jobacct_type;
  1189. }
  1190. /* slurm_get_jobacct_freq
  1191. * returns the job accounting poll frequency from the slurmctld_conf object
  1192. * RET int - job accounting frequency
  1193. */
  1194. uint16_t slurm_get_jobacct_gather_freq(void)
  1195. {
  1196. uint16_t freq = 0;
  1197. slurm_ctl_conf_t *conf;
  1198. if (slurmdbd_conf) {
  1199. } else {
  1200. conf = slurm_conf_lock();
  1201. freq = conf->job_acct_gather_freq;
  1202. slurm_conf_unlock();
  1203. }
  1204. return freq;
  1205. }
  1206. /* slurm_get_energy_accounting_type
  1207. * get EnergyAccountingType from slurmctld_conf object
  1208. * RET char * - energy_accounting type, MUST be xfreed by caller
  1209. */
  1210. char *slurm_get_acct_gather_energy_type(void)
  1211. {
  1212. char *acct_gather_energy_type = NULL;
  1213. slurm_ctl_conf_t *conf;
  1214. if (slurmdbd_conf) {
  1215. } else {
  1216. conf = slurm_conf_lock();
  1217. acct_gather_energy_type =
  1218. xstrdup(conf->acct_gather_energy_type);
  1219. slurm_conf_unlock();
  1220. }
  1221. return acct_gather_energy_type;
  1222. }
  1223. extern uint16_t slurm_get_acct_gather_node_freq(void)
  1224. {
  1225. uint16_t freq = 0;
  1226. slurm_ctl_conf_t *conf;
  1227. if (slurmdbd_conf) {
  1228. } else {
  1229. conf = slurm_conf_lock();
  1230. freq = conf->acct_gather_node_freq;
  1231. slurm_conf_unlock();
  1232. }
  1233. return freq;
  1234. }
  1235. /* slurm_get_jobcomp_type
  1236. * returns the job completion logger type from slurmctld_conf object
  1237. * RET char * - job completion type, MUST be xfreed by caller
  1238. */
  1239. char *slurm_get_jobcomp_type(void)
  1240. {
  1241. char *jobcomp_type = NULL;
  1242. slurm_ctl_conf_t *conf;
  1243. if (slurmdbd_conf) {
  1244. } else {
  1245. conf = slurm_conf_lock();
  1246. jobcomp_type = xstrdup(conf->job_comp_type);
  1247. slurm_conf_unlock();
  1248. }
  1249. return jobcomp_type;
  1250. }
  1251. /* slurm_get_jobcomp_loc
  1252. * returns the job completion loc from slurmctld_conf object
  1253. * RET char * - job completion location, MUST be xfreed by caller
  1254. */
  1255. char *slurm_get_jobcomp_loc(void)
  1256. {
  1257. char *jobcomp_loc = 0;
  1258. slurm_ctl_conf_t *conf;
  1259. if (slurmdbd_conf) {
  1260. } else {
  1261. conf = slurm_conf_lock();
  1262. jobcomp_loc = xstrdup(conf->job_comp_loc);
  1263. slurm_conf_unlock();
  1264. }
  1265. return jobcomp_loc;
  1266. }
  1267. /* slurm_get_jobcomp_user
  1268. * returns the storage user from slurmctld_conf object
  1269. * RET char * - storage user, MUST be xfreed by caller
  1270. */
  1271. char *slurm_get_jobcomp_user(void)
  1272. {
  1273. char *storage_user = NULL;
  1274. slurm_ctl_conf_t *conf;
  1275. if (slurmdbd_conf) {
  1276. } else {
  1277. conf = slurm_conf_lock();
  1278. storage_user = xstrdup(conf->job_comp_user);
  1279. slurm_conf_unlock();
  1280. }
  1281. return storage_user;
  1282. }
  1283. /* slurm_get_jobcomp_host
  1284. * returns the storage host from slurmctld_conf object
  1285. * RET char * - storage host, MUST be xfreed by caller
  1286. */
  1287. char *slurm_get_jobcomp_host(void)
  1288. {
  1289. char *storage_host = NULL;
  1290. slurm_ctl_conf_t *conf;
  1291. if (slurmdbd_conf) {
  1292. } else {
  1293. conf = slurm_conf_lock();
  1294. storage_host = xstrdup(conf->job_comp_host);
  1295. slurm_conf_unlock();
  1296. }
  1297. return storage_host;
  1298. }
  1299. /* slurm_get_jobcomp_pass
  1300. * returns the storage password from slurmctld_conf object
  1301. * RET char * - storage password, MUST be xfreed by caller
  1302. */
  1303. char *slurm_get_jobcomp_pass(void)
  1304. {
  1305. char *storage_pass = NULL;
  1306. slurm_ctl_conf_t *conf;
  1307. if (slurmdbd_conf) {
  1308. } else {
  1309. conf = slurm_conf_lock();
  1310. storage_pass = xstrdup(conf->job_comp_pass);
  1311. slurm_conf_unlock();
  1312. }
  1313. return storage_pass;
  1314. }
  1315. /* slurm_get_jobcomp_port
  1316. * returns the storage port from slurmctld_conf object
  1317. * RET uint32_t - storage port
  1318. */
  1319. uint32_t slurm_get_jobcomp_port(void)
  1320. {
  1321. uint32_t storage_port = 0;
  1322. slurm_ctl_conf_t *conf;
  1323. if (slurmdbd_conf) {
  1324. } else {
  1325. conf = slurm_conf_lock();
  1326. storage_port = conf->job_comp_port;
  1327. slurm_conf_unlock();
  1328. }
  1329. return storage_port;
  1330. }
  1331. /* slurm_set_jobcomp_port
  1332. * sets the jobcomp port in slurmctld_conf object
  1333. * RET 0 or error code
  1334. */
  1335. int slurm_set_jobcomp_port(uint32_t port)
  1336. {
  1337. slurm_ctl_conf_t *conf;
  1338. if (slurmdbd_conf) {
  1339. } else {
  1340. conf = slurm_conf_lock();
  1341. if (port == 0) {
  1342. error("can't have jobcomp port of 0");
  1343. return SLURM_ERROR;
  1344. }
  1345. conf->job_comp_port = port;
  1346. slurm_conf_unlock();
  1347. }
  1348. return 0;
  1349. }
  1350. /* slurm_get_kill_wait
  1351. * returns kill_wait from slurmctld_conf object
  1352. * RET uint16_t - kill_wait
  1353. */
  1354. uint16_t slurm_get_kill_wait(void)
  1355. {
  1356. uint16_t kill_wait = 0;
  1357. slurm_ctl_conf_t *conf;
  1358. if (slurmdbd_conf) {
  1359. } else {
  1360. conf = slurm_conf_lock();
  1361. kill_wait = conf->kill_wait;
  1362. slurm_conf_unlock();
  1363. }
  1364. return kill_wait;
  1365. }
  1366. /* slurm_get_launch_type
  1367. * get launch_type from slurmctld_conf object
  1368. * RET char * - launch_type, MUST be xfreed by caller
  1369. */
  1370. char *slurm_get_launch_type(void)
  1371. {
  1372. char *launch_type = NULL;
  1373. slurm_ctl_conf_t *conf;
  1374. if (slurmdbd_conf) {
  1375. } else {
  1376. conf = slurm_conf_lock();
  1377. launch_type = xstrdup(conf->launch_type);
  1378. slurm_conf_unlock();
  1379. }
  1380. return launch_type;
  1381. }
  1382. /* slurm_set_launch_type
  1383. * set launch_type in slurmctld_conf object
  1384. * RET 0 or error code
  1385. */
  1386. int slurm_set_launch_type(char *launch_type)
  1387. {
  1388. slurm_ctl_conf_t *conf;
  1389. if (slurmdbd_conf) {
  1390. } else {
  1391. conf = slurm_conf_lock();
  1392. xfree(conf->launch_type);
  1393. conf->launch_type = xstrdup(launch_type);
  1394. slurm_conf_unlock();
  1395. }
  1396. return 0;
  1397. }
  1398. /* slurm_get_preempt_type
  1399. * get PreemptType from slurmctld_conf object
  1400. * RET char * - preempt type, MUST be xfreed by caller
  1401. */
  1402. char *slurm_get_preempt_type(void)
  1403. {
  1404. char *preempt_type = NULL;
  1405. slurm_ctl_conf_t *conf;
  1406. if (slurmdbd_conf) {
  1407. } else {
  1408. conf = slurm_conf_lock();
  1409. preempt_type = xstrdup(conf->preempt_type);
  1410. slurm_conf_unlock();
  1411. }
  1412. return preempt_type;
  1413. }
  1414. /* slurm_get_proctrack_type
  1415. * get ProctrackType from slurmctld_conf object
  1416. * RET char * - proctrack type, MUST be xfreed by caller
  1417. */
  1418. char *slurm_get_proctrack_type(void)
  1419. {
  1420. char *proctrack_type = NULL;
  1421. slurm_ctl_conf_t *conf;
  1422. if (slurmdbd_conf) {
  1423. } else {
  1424. conf = slurm_conf_lock();
  1425. proctrack_type = xstrdup(conf->proctrack_type);
  1426. slurm_conf_unlock();
  1427. }
  1428. return proctrack_type;
  1429. }
  1430. /* slurm_get_slurmd_port
  1431. * returns slurmd port from slurmctld_conf object
  1432. * RET uint16_t - slurmd port
  1433. */
  1434. uint16_t slurm_get_slurmd_port(void)
  1435. {
  1436. uint16_t slurmd_port = 0;
  1437. slurm_ctl_conf_t *conf;
  1438. if (slurmdbd_conf) {
  1439. } else {
  1440. conf = slurm_conf_lock();
  1441. slurmd_port = conf->slurmd_port;
  1442. slurm_conf_unlock();
  1443. }
  1444. return slurmd_port;
  1445. }
  1446. /* slurm_get_slurm_user_id
  1447. * returns slurm uid from slurmctld_conf object
  1448. * RET uint32_t - slurm user id
  1449. */
  1450. uint32_t slurm_get_slurm_user_id(void)
  1451. {
  1452. uint32_t slurm_uid = 0;
  1453. slurm_ctl_conf_t *conf;
  1454. if (slurmdbd_conf) {
  1455. slurm_uid = slurmdbd_conf->slurm_user_id;
  1456. } else {
  1457. conf = slurm_conf_lock();
  1458. slurm_uid = conf->slurm_user_id;
  1459. slurm_conf_unlock();
  1460. }
  1461. return slurm_uid;
  1462. }
  1463. /* slurm_get_slurmd_user_id
  1464. * returns slurmd uid from slurmctld_conf object
  1465. * RET uint32_t - slurmd user id
  1466. */
  1467. uint32_t slurm_get_slurmd_user_id(void)
  1468. {
  1469. uint32_t slurmd_uid = 0;
  1470. slurm_ctl_conf_t *conf;
  1471. if (slurmdbd_conf) {
  1472. } else {
  1473. conf = slurm_conf_lock();
  1474. slurmd_uid = conf->slurmd_user_id;
  1475. slurm_conf_unlock();
  1476. }
  1477. return slurmd_uid;
  1478. }
  1479. /* slurm_get_root_filter
  1480. * RET uint16_t - Value of SchedulerRootFilter */
  1481. extern uint16_t slurm_get_root_filter(void)
  1482. {
  1483. uint16_t root_filter = 0;
  1484. slurm_ctl_conf_t *conf;
  1485. if (slurmdbd_conf) {
  1486. } else {
  1487. conf = slurm_conf_lock();
  1488. root_filter = conf->schedrootfltr;
  1489. slurm_conf_unlock();
  1490. }
  1491. return root_filter;
  1492. }
  1493. /* slurm_get_sched_params
  1494. * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */
  1495. extern char *slurm_get_sched_params(void)
  1496. {
  1497. char *params = 0;
  1498. slurm_ctl_conf_t *conf;
  1499. if (slurmdbd_conf) {
  1500. } else {
  1501. conf = slurm_conf_lock();
  1502. params = xstrdup(conf->sched_params);
  1503. slurm_conf_unlock();
  1504. }
  1505. return params;
  1506. }
  1507. /* slurm_get_sched_port
  1508. * RET uint16_t - Value of SchedulerPort */
  1509. extern uint16_t slurm_get_sched_port(void)
  1510. {
  1511. uint16_t port = 0;
  1512. slurm_ctl_conf_t *conf;
  1513. if (slurmdbd_conf) {
  1514. } else {
  1515. conf = slurm_conf_lock();
  1516. port = conf->schedport;
  1517. slurm_conf_unlock();
  1518. }
  1519. return port;
  1520. }
  1521. /* slurm_get_sched_type
  1522. * get sched type from slurmctld_conf object
  1523. * RET char * - sched type, MUST be xfreed by caller
  1524. */
  1525. char *slurm_get_sched_type(void)
  1526. {
  1527. char *sched_type = NULL;
  1528. slurm_ctl_conf_t *conf;
  1529. if (slurmdbd_conf) {
  1530. } else {
  1531. conf = slurm_conf_lock();
  1532. sched_type = xstrdup(conf->schedtype);
  1533. slurm_conf_unlock();
  1534. }
  1535. return sched_type;
  1536. }
  1537. /* slurm_get_select_type
  1538. * get select_type from slurmctld_conf object
  1539. * RET char * - select_type, MUST be xfreed by caller
  1540. */
  1541. char *slurm_get_select_type(void)
  1542. {
  1543. char *select_type = NULL;
  1544. slurm_ctl_conf_t *conf;
  1545. if (slurmdbd_conf) {
  1546. } else {
  1547. conf = slurm_conf_lock();
  1548. select_type = xstrdup(conf->select_type);
  1549. slurm_conf_unlock();
  1550. }
  1551. return select_type;
  1552. }
  1553. /* slurm_get_select_type_param
  1554. * get select_type_param from slurmctld_conf object
  1555. * RET uint16_t - select_type_param
  1556. */
  1557. uint16_t slurm_get_select_type_param(void)
  1558. {
  1559. uint16_t select_type_param = 0;
  1560. slurm_ctl_conf_t *conf;
  1561. if (slurmdbd_conf) {
  1562. } else {
  1563. conf = slurm_conf_lock();
  1564. select_type_param = conf->select_type_param;
  1565. slurm_conf_unlock();
  1566. }
  1567. return select_type_param;
  1568. }
  1569. /** Return true if (remote) system runs Cray XT/XE */
  1570. bool is_cray_select_type(void)
  1571. {
  1572. bool result = false;
  1573. if (slurmdbd_conf) {
  1574. } else {
  1575. slurm_ctl_conf_t *conf = slurm_conf_lock();
  1576. result = strcasecmp(conf->select_type, "select/cray") == 0;
  1577. slurm_conf_unlock();
  1578. }
  1579. return result;
  1580. }
  1581. /* slurm_get_switch_type
  1582. * get switch type from slurmctld_conf object
  1583. * RET char * - switch type, MUST be xfreed by caller
  1584. */
  1585. char *slurm_get_switch_type(void)
  1586. {
  1587. char *switch_type = NULL;
  1588. slurm_ctl_conf_t *conf;
  1589. conf = slurm_conf_lock();
  1590. switch_type = xstrdup(conf->switch_type);
  1591. slurm_conf_unlock();
  1592. return switch_type;
  1593. }
  1594. /* slurm_get_wait_time
  1595. * returns wait_time from slurmctld_conf object
  1596. * RET uint16_t - wait_time
  1597. */
  1598. uint16_t slurm_get_wait_time(void)
  1599. {
  1600. uint16_t wait_time = 0;
  1601. slurm_ctl_conf_t *conf;
  1602. if (slurmdbd_conf) {
  1603. } else {
  1604. conf = slurm_conf_lock();
  1605. wait_time = conf->wait_time;
  1606. slurm_conf_unlock();
  1607. }
  1608. return wait_time;
  1609. }
  1610. /* slurm_get_srun_prolog
  1611. * return the name of the srun prolog program
  1612. * RET char * - name of prolog program, must be xfreed by caller
  1613. */
  1614. char *slurm_get_srun_prolog(void)
  1615. {
  1616. char *prolog = NULL;
  1617. slurm_ctl_conf_t *conf;
  1618. if (slurmdbd_conf) {
  1619. } else {
  1620. conf = slurm_conf_lock();
  1621. prolog = xstrdup(conf->srun_prolog);
  1622. slurm_conf_unlock();
  1623. }
  1624. return prolog;
  1625. }
  1626. /* slurm_get_srun_epilog
  1627. * return the name of the srun epilog program
  1628. * RET char * - name of epilog program, must be xfreed by caller
  1629. */
  1630. char *slurm_get_srun_epilog(void)
  1631. {
  1632. char *epilog = NULL;
  1633. slurm_ctl_conf_t *conf;
  1634. if (slurmdbd_conf) {
  1635. } else {
  1636. conf = slurm_conf_lock();
  1637. epilog = xstrdup(conf->srun_epilog);
  1638. slurm_conf_unlock();
  1639. }
  1640. return epilog;
  1641. }
  1642. /* slurm_get_task_epilog
  1643. * RET task_epilog name, must be xfreed by caller */
  1644. char *slurm_get_task_epilog(void)
  1645. {
  1646. char *task_epilog = NULL;
  1647. slurm_ctl_conf_t *conf;
  1648. if (slurmdbd_conf) {
  1649. } else {
  1650. conf = slurm_conf_lock();
  1651. task_epilog = xstrdup(conf->task_epilog);
  1652. slurm_conf_unlock();
  1653. }
  1654. return task_epilog;
  1655. }
  1656. /* slurm_get_task_prolog
  1657. * RET task_prolog name, must be xfreed by caller */
  1658. char *slurm_get_task_prolog(void)
  1659. {
  1660. char *task_prolog = NULL;
  1661. slurm_ctl_conf_t *conf;
  1662. if (slurmdbd_conf) {
  1663. } else {
  1664. conf = slurm_conf_lock();
  1665. task_prolog = xstrdup(conf->task_prolog);
  1666. slurm_conf_unlock();
  1667. }
  1668. return task_prolog;
  1669. }
  1670. /* slurm_get_task_plugin
  1671. * RET task_plugin name, must be xfreed by caller */
  1672. char *slurm_get_task_plugin(void)
  1673. {
  1674. char *task_plugin = NULL;
  1675. slurm_ctl_conf_t *conf;
  1676. conf = slurm_conf_lock();
  1677. task_plugin = xstrdup(conf->task_plugin);
  1678. slurm_conf_unlock();
  1679. return task_plugin;
  1680. }
  1681. /* slurm_get_task_plugin_param */
  1682. uint16_t slurm_get_task_plugin_param(void)
  1683. {
  1684. uint16_t task_plugin_param = 0;
  1685. slurm_ctl_conf_t *conf;
  1686. if (slurmdbd_conf) {
  1687. } else {
  1688. conf = slurm_conf_lock();
  1689. task_plugin_param = conf->task_plugin_param;
  1690. slurm_conf_unlock();
  1691. }
  1692. return task_plugin_param;
  1693. }
  1694. /* Change general slurm communication errors to slurmctld specific errors */
  1695. static void _remap_slurmctld_errno(void)
  1696. {
  1697. int err = slurm_get_errno();
  1698. if (err == SLURM_COMMUNICATIONS_CONNECTION_ERROR)
  1699. slurm_seterrno(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
  1700. else if (err == SLURM_COMMUNICATIONS_SEND_ERROR)
  1701. slurm_seterrno(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
  1702. else if (err == SLURM_COMMUNICATIONS_RECEIVE_ERROR)
  1703. slurm_seterrno(SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR);
  1704. else if (err == SLURM_COMMUNICATIONS_SHUTDOWN_ERROR)
  1705. slurm_seterrno(SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR);
  1706. }
  1707. /**********************************************************************\
  1708. * general message management functions used by slurmctld, slurmd
  1709. \**********************************************************************/
  1710. /* In the socket implementation it creates a socket, binds to it, and
  1711. * listens for connections.
  1712. *
  1713. * IN port - port to bind the msg server to
  1714. * RET slurm_fd_t - file descriptor of the connection created
  1715. */
  1716. slurm_fd_t slurm_init_msg_engine_port(uint16_t port)
  1717. {
  1718. slurm_addr_t addr;
  1719. slurm_set_addr_any(&addr, port);
  1720. return _slurm_init_msg_engine(&addr);
  1721. }
  1722. /* In the socket implementation it creates a socket, binds to it, and
  1723. * listens for connections.
  1724. *
  1725. * IN addr_name - address to bind the msg server to (NULL means any)
  1726. * IN port - port to bind the msg server to
  1727. * RET slurm_fd_t - file descriptor of the connection created
  1728. */
  1729. slurm_fd_t slurm_init_msg_engine_addrname_port(char *addr_name, uint16_t port)
  1730. {
  1731. slurm_addr_t addr;
  1732. #ifdef BIND_SPECIFIC_ADDR
  1733. if (addr_name != NULL)
  1734. slurm_set_addr(&addr, port, addr_name);
  1735. else
  1736. slurm_set_addr_any(&addr, port);
  1737. #else
  1738. slurm_set_addr_any(&addr, port);
  1739. #endif
  1740. return _slurm_init_msg_engine(&addr);
  1741. }
  1742. /*
  1743. * Same as above, but initialize using a slurm address "addr"
  1744. *
  1745. * IN addr - slurm_addr_t to bind the msg server to
  1746. * RET slurm_fd_t - file descriptor of the connection created
  1747. */
  1748. slurm_fd_t slurm_init_msg_engine(slurm_addr_t *addr)
  1749. {
  1750. return _slurm_init_msg_engine(addr);
  1751. }
  1752. /*
  1753. * Close an established message engine.
  1754. * Returns SLURM_SUCCESS or SLURM_FAILURE.
  1755. *
  1756. * IN fd - an open file descriptor to close
  1757. * RET int - the return code
  1758. */
  1759. int slurm_shutdown_msg_engine(slurm_fd_t fd)
  1760. {
  1761. int rc = _slurm_close(fd);
  1762. if (rc)
  1763. slurm_seterrno(SLURM_COMMUNICATIONS_SHUTDOWN_ERROR);
  1764. return rc;
  1765. }
  1766. /*
  1767. * Close an established message connection.
  1768. * Returns SLURM_SUCCESS or SLURM_FAILURE.
  1769. *
  1770. * IN fd - an open file descriptor to close
  1771. * RET int - the return code
  1772. */
  1773. int slurm_shutdown_msg_conn(slurm_fd_t fd)
  1774. {
  1775. return _slurm_close(fd);
  1776. }
  1777. /**********************************************************************\
  1778. * msg connection establishment functions used by msg clients
  1779. \**********************************************************************/
  1780. /* In the bsd socket implementation it creates a SOCK_STREAM socket
  1781. * and calls connect on it a SOCK_DGRAM socket called with connect
  1782. * is defined to only receive messages from the address/port pair
  1783. * argument of the connect call slurm_address - for now it is
  1784. * really just a sockaddr_in
  1785. * IN slurm_address - slurm_addr_t of the connection destination
  1786. * RET slurm_fd - file descriptor of the connection created
  1787. */
  1788. slurm_fd_t slurm_open_msg_conn(slurm_addr_t * slurm_address)
  1789. {
  1790. return _slurm_open_msg_conn(slurm_address);
  1791. }
  1792. /* Calls connect to make a connection-less datagram connection to the
  1793. * primary or secondary slurmctld message engine. If the controller
  1794. * is very busy the connect may fail, so retry a couple of times.
  1795. * IN/OUT addr - address of controller contacted
  1796. * RET slurm_fd - file descriptor of the connection created
  1797. */
  1798. slurm_fd_t slurm_open_controller_conn(slurm_addr_t *addr)
  1799. {
  1800. slurm_fd_t fd = -1;
  1801. slurm_ctl_conf_t *conf;
  1802. slurm_protocol_config_t *myproto = NULL;
  1803. int retry, have_backup = 0;
  1804. if (!working_cluster_rec) {
  1805. /* This means the addr wasn't set up already.
  1806. */
  1807. if (slurm_api_set_default_config() < 0)
  1808. return SLURM_FAILURE;
  1809. myproto = xmalloc(sizeof(slurm_protocol_config_t));
  1810. memcpy(myproto, proto_conf, sizeof(slurm_protocol_config_t));
  1811. myproto->primary_controller.sin_port =
  1812. htons(slurmctld_conf.slurmctld_port +
  1813. (((time(NULL) + getpid()) %
  1814. slurmctld_conf.slurmctld_port_count)));
  1815. myproto->secondary_controller.sin_port =
  1816. myproto->primary_controller.sin_port;
  1817. }
  1818. for (retry=0; retry<slurm_get_msg_timeout(); retry++) {
  1819. if (retry)
  1820. sleep(1);
  1821. if (working_cluster_rec) {
  1822. if (working_cluster_rec->control_addr.sin_port == 0) {
  1823. slurm_set_addr(
  1824. &working_cluster_rec->control_addr,
  1825. working_cluster_rec->control_port,
  1826. working_cluster_rec->control_host);
  1827. }
  1828. addr = &working_cluster_rec->control_addr;
  1829. fd = slurm_open_msg_conn(addr);
  1830. if (fd >= 0)
  1831. goto end_it;
  1832. debug("Failed to contact controller: %m");
  1833. } else {
  1834. fd = slurm_open_msg_conn(&myproto->primary_controller);
  1835. if (fd >= 0)
  1836. goto end_it;
  1837. debug("Failed to contact primary controller: %m");
  1838. if (retry == 0) {
  1839. conf = slurm_conf_lock();
  1840. if (conf->backup_controller)
  1841. have_backup = 1;
  1842. slurm_conf_unlock();
  1843. }
  1844. if (have_backup) {
  1845. fd = slurm_open_msg_conn(&myproto->
  1846. secondary_controller);
  1847. if (fd >= 0)
  1848. goto end_it;
  1849. debug("Failed to contact secondary "
  1850. "controller: %m");
  1851. }
  1852. }
  1853. }
  1854. addr = NULL;
  1855. slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);
  1856. end_it:
  1857. xfree(myproto);
  1858. return fd;
  1859. }
  1860. /* calls connect to make a connection-less datagram connection to the
  1861. * primary or secondary slurmctld message engine
  1862. * RET slurm_fd_t - file descriptor of the connection created
  1863. * IN dest - controller to contact, primary or secondary
  1864. */
  1865. slurm_fd_t slurm_open_controller_conn_spec(enum controller_id dest)
  1866. {
  1867. slurm_addr_t *addr;
  1868. slurm_fd_t rc;
  1869. if (slurm_api_set_default_config() < 0) {
  1870. debug3("Error: Unable to set default config");
  1871. return SLURM_ERROR;
  1872. }
  1873. if (dest == PRIMARY_CONTROLLER)
  1874. addr = &proto_conf->primary_controller;
  1875. else { /* (dest == SECONDARY_CONTROLLER) */
  1876. slurm_ctl_conf_t *conf;
  1877. addr = NULL;
  1878. conf = slurm_conf_lock();
  1879. if (conf->backup_addr)
  1880. addr = &proto_conf->secondary_controller;
  1881. slurm_conf_unlock();
  1882. if (!addr)
  1883. return SLURM_ERROR;
  1884. }
  1885. rc = slurm_open_msg_conn(addr);
  1886. if (rc == -1)
  1887. _remap_slurmctld_errno();
  1888. return rc;
  1889. }
  1890. /* gets the slurm_addr_t of the specified controller
  1891. * primary or secondary slurmctld message engine
  1892. * IN dest - controller to contact, primary or secondary
  1893. * OUT addr - slurm_addr_t to the specified controller
  1894. */
  1895. void slurm_get_controller_addr_spec(enum controller_id dest, slurm_addr_t *addr)
  1896. {
  1897. addr = (dest == PRIMARY_CONTROLLER) ?
  1898. &proto_conf->primary_controller :
  1899. &proto_conf->secondary_controller;
  1900. }
  1901. /* In the bsd implmentation maps directly to a accept call
  1902. * IN open_fd - file descriptor to accept connection on
  1903. * OUT slurm_address - slurm_addr_t of the accepted connection
  1904. * RET slurm_fd - file descriptor of the connection created
  1905. */
  1906. slurm_fd_t slurm_accept_msg_conn(slurm_fd_t open_fd,
  1907. slurm_addr_t * slurm_address)
  1908. {
  1909. return _slurm_accept_msg_conn(open_fd, slurm_address);
  1910. }
  1911. /* In the bsd implmentation maps directly to a close call, to close
  1912. * the socket that was accepted
  1913. * IN open_fd - an open file descriptor to close
  1914. * RET int - the return code
  1915. */
  1916. int slurm_close_accepted_conn(slurm_fd_t open_fd)
  1917. {
  1918. return _slurm_close_accepted_conn(open_fd);
  1919. }
  1920. /**********************************************************************\
  1921. * receive message functions
  1922. \**********************************************************************/
  1923. /*
  1924. * NOTE: memory is allocated for the returned msg must be freed at
  1925. * some point using the slurm_free_functions.
  1926. * IN open_fd - file descriptor to receive msg on
  1927. * OUT msg - a slurm_msg struct to be filled in by the function
  1928. * IN timeout - how long to wait in milliseconds
  1929. * RET int - returns 0 on success, -1 on failure and sets errno
  1930. */
  1931. int slurm_receive_msg(slurm_fd_t fd, slurm_msg_t *msg, int timeout)
  1932. {
  1933. char *buf = NULL;
  1934. size_t buflen = 0;
  1935. header_t header;
  1936. int rc;
  1937. void *auth_cred = NULL;
  1938. Buf buffer;
  1939. xassert(fd >= 0);
  1940. slurm_msg_t_init(msg);
  1941. msg->conn_fd = fd;
  1942. if (timeout <= 0)
  1943. /* convert secs to msec */
  1944. timeout = slurm_get_msg_timeout() * 1000;
  1945. else if (timeout > (slurm_get_msg_timeout() * 10000)) {
  1946. debug("You are receiving a message with very long "
  1947. "timeout of %d seconds", (timeout/1000));
  1948. } else if (timeout < 1000) {
  1949. error("You are receiving a message with a very short "
  1950. "timeout of %d msecs", timeout);
  1951. }
  1952. /*
  1953. * Receive a msg. slurm_msg_recvfrom() will read the message
  1954. * length and allocate space on the heap for a buffer containing
  1955. * the message.
  1956. */
  1957. if (_slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
  1958. forward_init(&header.forward, NULL);
  1959. rc = errno;
  1960. goto total_return;
  1961. }
  1962. #if _DEBUG
  1963. _print_data (buf, buflen);
  1964. #endif
  1965. buffer = create_buf(buf, buflen);
  1966. if (unpack_header(&header, buffer) == SLURM_ERROR) {
  1967. free_buf(buffer);
  1968. rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
  1969. goto total_return;
  1970. }
  1971. if (check_header_version(&header) < 0) {
  1972. slurm_addr_t resp_addr;
  1973. char addr_str[32];
  1974. int uid = _unpack_msg_uid(buffer);
  1975. slurm_get_peer_addr(fd, &resp_addr);
  1976. slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
  1977. error("Invalid Protocol Version %u from uid=%d at %s",
  1978. header.version, uid, addr_str);
  1979. free_buf(buffer);
  1980. rc = SLURM_PROTOCOL_VERSION_ERROR;
  1981. goto total_return;
  1982. }
  1983. //info("ret_cnt = %d",header.ret_cnt);
  1984. if (header.ret_cnt > 0) {
  1985. error("we received more than one message back use "
  1986. "slurm_receive_msgs instead");
  1987. header.ret_cnt = 0;
  1988. list_destroy(header.ret_list);
  1989. header.ret_list = NULL;
  1990. }
  1991. /* Forward message to other nodes */
  1992. if (header.forward.cnt > 0) {
  1993. error("We need to forward this to other nodes use "
  1994. "slurm_receive_msg_and_forward instead");
  1995. }
  1996. if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL) {
  1997. error( "authentication: %s ",
  1998. g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
  1999. free_buf(buffer);
  2000. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2001. goto total_return;
  2002. }
  2003. if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
  2004. rc = g_slurm_auth_verify( auth_cred, NULL, 2,
  2005. _global_auth_key() );
  2006. } else
  2007. rc = g_slurm_auth_verify( auth_cred, NULL, 2, NULL );
  2008. if (rc != SLURM_SUCCESS) {
  2009. error( "authentication: %s ",
  2010. g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
  2011. (void) g_slurm_auth_destroy(auth_cred);
  2012. free_buf(buffer);
  2013. rc = SLURM_PROTOCOL_AUTHENTICATION_ERROR;
  2014. goto total_return;
  2015. }
  2016. /*
  2017. * Unpack message body
  2018. */
  2019. msg->protocol_version = header.version;
  2020. msg->msg_type = header.msg_type;
  2021. msg->flags = header.flags;
  2022. if ((header.body_length > remaining_buf(buffer)) ||
  2023. (unpack_msg(msg, buffer) != SLURM_SUCCESS)) {
  2024. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2025. (void) g_slurm_auth_destroy(auth_cred);
  2026. free_buf(buffer);
  2027. goto total_return;
  2028. }
  2029. msg->auth_cred = (void *)auth_cred;
  2030. free_buf(buffer);
  2031. rc = SLURM_SUCCESS;
  2032. total_return:
  2033. destroy_forward(&header.forward);
  2034. slurm_seterrno(rc);
  2035. if (rc != SLURM_SUCCESS) {
  2036. msg->auth_cred = (void *) NULL;
  2037. error("slurm_receive_msg: %s", slurm_strerror(rc));
  2038. rc = -1;
  2039. } else {
  2040. rc = 0;
  2041. }
  2042. return rc;
  2043. }
  2044. /*
  2045. * NOTE: memory is allocated for the returned list
  2046. * and must be freed at some point using the list_destroy function.
  2047. * IN open_fd - file descriptor to receive msg on
  2048. * IN steps - how many steps down the tree we have to wait for
  2049. * IN timeout - how long to wait in milliseconds
  2050. * RET List - List containing the responses of the childern (if any) we
  2051. * forwarded the message to. List containing type
  2052. * (ret_data_info_t).
  2053. */
  2054. List slurm_receive_msgs(slurm_fd_t fd, int steps, int timeout)
  2055. {
  2056. char *buf = NULL;
  2057. size_t buflen = 0;
  2058. header_t header;
  2059. int rc;
  2060. void *auth_cred = NULL;
  2061. slurm_msg_t msg;
  2062. Buf buffer;
  2063. ret_data_info_t *ret_data_info = NULL;
  2064. List ret_list = NULL;
  2065. int orig_timeout = timeout;
  2066. xassert(fd >= 0);
  2067. slurm_msg_t_init(&msg);
  2068. msg.conn_fd = fd;
  2069. if (timeout <= 0) {
  2070. /* convert secs to msec */
  2071. timeout = slurm_get_msg_timeout() * 1000;
  2072. orig_timeout = timeout;
  2073. }
  2074. if (steps) {
  2075. if (message_timeout < 0)
  2076. message_timeout = slurm_get_msg_timeout() * 1000;
  2077. orig_timeout = (timeout -
  2078. (message_timeout*(steps-1)))/steps;
  2079. steps--;
  2080. }
  2081. debug4("orig_timeout was %d we have %d steps and a timeout of %d",
  2082. orig_timeout, steps, timeout);
  2083. /* we compare to the orig_timeout here because that is really
  2084. * what we are going to wait for each step
  2085. */
  2086. if (orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
  2087. debug("slurm_receive_msgs: "
  2088. "You are sending a message with timeout's greater "
  2089. "than %d seconds, your's is %d seconds",
  2090. (slurm_get_msg_timeout() * 10),
  2091. (timeout/1000));
  2092. } else if (orig_timeout < 1000) {
  2093. debug("slurm_receive_msgs: "
  2094. "You are sending a message with a very short timeout of "
  2095. "%d milliseconds each step in the tree has %d "
  2096. "milliseconds", timeout, orig_timeout);
  2097. }
  2098. /*
  2099. * Receive a msg. slurm_msg_recvfrom() will read the message
  2100. * length and allocate space on the heap for a buffer containing
  2101. * the message.
  2102. */
  2103. if (_slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
  2104. forward_init(&header.forward, NULL);
  2105. rc = errno;
  2106. goto total_return;
  2107. }
  2108. #if _DEBUG
  2109. _print_data (buf, buflen);
  2110. #endif
  2111. buffer = create_buf(buf, buflen);
  2112. if (unpack_header(&header, buffer) == SLURM_ERROR) {
  2113. free_buf(buffer);
  2114. rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
  2115. goto total_return;
  2116. }
  2117. if (check_header_version(&header) < 0) {
  2118. slurm_addr_t resp_addr;
  2119. char addr_str[32];
  2120. int uid = _unpack_msg_uid(buffer);
  2121. slurm_get_peer_addr(fd, &resp_addr);
  2122. slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
  2123. error("Invalid Protocol Version %u from uid=%d at %s",
  2124. header.version, uid, addr_str);
  2125. free_buf(buffer);
  2126. rc = SLURM_PROTOCOL_VERSION_ERROR;
  2127. goto total_return;
  2128. }
  2129. //info("ret_cnt = %d",header.ret_cnt);
  2130. if (header.ret_cnt > 0) {
  2131. if (header.ret_list)
  2132. ret_list = header.ret_list;
  2133. else
  2134. ret_list = list_create(destroy_data_info);
  2135. header.ret_cnt = 0;
  2136. header.ret_list = NULL;
  2137. }
  2138. /* Forward message to other nodes */
  2139. if (header.forward.cnt > 0) {
  2140. error("We need to forward this to other nodes use "
  2141. "slurm_receive_msg_and_forward instead");
  2142. }
  2143. if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL) {
  2144. error( "authentication: %s ",
  2145. g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
  2146. free_buf(buffer);
  2147. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2148. goto total_return;
  2149. }
  2150. if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
  2151. rc = g_slurm_auth_verify( auth_cred, NULL, 2,
  2152. _global_auth_key() );
  2153. } else
  2154. rc = g_slurm_auth_verify( auth_cred, NULL, 2, NULL );
  2155. if (rc != SLURM_SUCCESS) {
  2156. error("authentication: %s ",
  2157. g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
  2158. (void) g_slurm_auth_destroy(auth_cred);
  2159. free_buf(buffer);
  2160. rc = SLURM_PROTOCOL_AUTHENTICATION_ERROR;
  2161. goto total_return;
  2162. }
  2163. /*
  2164. * Unpack message body
  2165. */
  2166. msg.protocol_version = header.version;
  2167. msg.msg_type = header.msg_type;
  2168. msg.flags = header.flags;
  2169. if ((header.body_length > remaining_buf(buffer)) ||
  2170. (unpack_msg(&msg, buffer) != SLURM_SUCCESS)) {
  2171. (void) g_slurm_auth_destroy(auth_cred);
  2172. free_buf(buffer);
  2173. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2174. goto total_return;
  2175. }
  2176. g_slurm_auth_destroy(auth_cred);
  2177. free_buf(buffer);
  2178. rc = SLURM_SUCCESS;
  2179. total_return:
  2180. destroy_forward(&header.forward);
  2181. if (rc != SLURM_SUCCESS) {
  2182. if (ret_list) {
  2183. ret_data_info = xmalloc(sizeof(ret_data_info_t));
  2184. ret_data_info->err = rc;
  2185. ret_data_info->type = RESPONSE_FORWARD_FAILED;
  2186. ret_data_info->data = NULL;
  2187. list_push(ret_list, ret_data_info);
  2188. }
  2189. error("slurm_receive_msgs: %s", slurm_strerror(rc));
  2190. } else {
  2191. if (!ret_list)
  2192. ret_list = list_create(destroy_data_info);
  2193. ret_data_info = xmalloc(sizeof(ret_data_info_t));
  2194. ret_data_info->err = rc;
  2195. ret_data_info->node_name = NULL;
  2196. ret_data_info->type = msg.msg_type;
  2197. ret_data_info->data = msg.data;
  2198. list_push(ret_list, ret_data_info);
  2199. }
  2200. errno = rc;
  2201. return ret_list;
  2202. }
  2203. /* try to determine the UID associated with a message with different
  2204. * message header version, return -1 if we can't tell */
  2205. static int _unpack_msg_uid(Buf buffer)
  2206. {
  2207. int uid = -1;
  2208. void *auth_cred = NULL;
  2209. if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL)
  2210. return uid;
  2211. uid = (int) g_slurm_auth_get_uid(auth_cred, NULL);
  2212. g_slurm_auth_destroy(auth_cred);
  2213. return uid;
  2214. }
  2215. /*
  2216. * NOTE: memory is allocated for the returned msg and the returned list
  2217. * both must be freed at some point using the slurm_free_functions
  2218. * and list_destroy function.
  2219. * IN open_fd - file descriptor to receive msg on
  2220. * IN/OUT msg - a slurm_msg struct to be filled in by the function
  2221. * we use the orig_addr from this var for forwarding.
  2222. * IN timeout - how long to wait in milliseconds
  2223. * RET int - returns 0 on success, -1 on failure and sets errno
  2224. */
  2225. int slurm_receive_msg_and_forward(slurm_fd_t fd, slurm_addr_t *orig_addr,
  2226. slurm_msg_t *msg, int timeout)
  2227. {
  2228. char *buf = NULL;
  2229. size_t buflen = 0;
  2230. header_t header;
  2231. int rc;
  2232. void *auth_cred = NULL;
  2233. Buf buffer;
  2234. xassert(fd >= 0);
  2235. if (msg->forward.init != FORWARD_INIT)
  2236. slurm_msg_t_init(msg);
  2237. /* set msg connection fd to accepted fd. This allows
  2238. * possibility for slurmd_req () to close accepted connection
  2239. */
  2240. msg->conn_fd = fd;
  2241. /* this always is the connection */
  2242. memcpy(&msg->address, orig_addr, sizeof(slurm_addr_t));
  2243. /* where the connection originated from, this
  2244. * might change based on the header we receive */
  2245. memcpy(&msg->orig_addr, orig_addr, sizeof(slurm_addr_t));
  2246. msg->ret_list = list_create(destroy_data_info);
  2247. if (timeout <= 0)
  2248. /* convert secs to msec */
  2249. timeout = slurm_get_msg_timeout() * 1000;
  2250. if (timeout >= (slurm_get_msg_timeout() * 10000)) {
  2251. debug("slurm_receive_msg_and_forward: "
  2252. "You are sending a message with timeout's greater "
  2253. "than %d seconds, your's is %d seconds",
  2254. (slurm_get_msg_timeout() * 10),
  2255. (timeout/1000));
  2256. } else if (timeout < 1000) {
  2257. debug("slurm_receive_msg_and_forward: "
  2258. "You are sending a message with a very short timeout of "
  2259. "%d milliseconds", timeout);
  2260. }
  2261. /*
  2262. * Receive a msg. slurm_msg_recvfrom() will read the message
  2263. * length and allocate space on the heap for a buffer containing
  2264. * the message.
  2265. */
  2266. if (_slurm_msg_recvfrom_timeout(fd, &buf, &buflen, 0, timeout) < 0) {
  2267. forward_init(&header.forward, NULL);
  2268. rc = errno;
  2269. goto total_return;
  2270. }
  2271. #if _DEBUG
  2272. _print_data (buf, buflen);
  2273. #endif
  2274. buffer = create_buf(buf, buflen);
  2275. if (unpack_header(&header, buffer) == SLURM_ERROR) {
  2276. free_buf(buffer);
  2277. rc = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
  2278. goto total_return;
  2279. }
  2280. if (check_header_version(&header) < 0) {
  2281. slurm_addr_t resp_addr;
  2282. char addr_str[32];
  2283. int uid = _unpack_msg_uid(buffer);
  2284. slurm_get_peer_addr(fd, &resp_addr);
  2285. slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
  2286. error("Invalid Protocol Version %u from uid=%d at %s",
  2287. header.version, uid, addr_str);
  2288. free_buf(buffer);
  2289. rc = SLURM_PROTOCOL_VERSION_ERROR;
  2290. goto total_return;
  2291. }
  2292. if (header.ret_cnt > 0) {
  2293. error("we received more than one message back use "
  2294. "slurm_receive_msgs instead");
  2295. header.ret_cnt = 0;
  2296. list_destroy(header.ret_list);
  2297. header.ret_list = NULL;
  2298. }
  2299. //info("ret_cnt = %d",header.ret_cnt);
  2300. /* if (header.ret_cnt > 0) { */
  2301. /* while ((ret_data_info = list_pop(header.ret_list))) */
  2302. /* list_push(msg->ret_list, ret_data_info); */
  2303. /* header.ret_cnt = 0; */
  2304. /* list_destroy(header.ret_list); */
  2305. /* header.ret_list = NULL; */
  2306. /* } */
  2307. /*
  2308. * header.orig_addr will be set to where the first message
  2309. * came from if this is a forward else we set the
  2310. * header.orig_addr to our addr just incase we need to send it off.
  2311. */
  2312. if (header.orig_addr.sin_addr.s_addr != 0) {
  2313. memcpy(&msg->orig_addr, &header.orig_addr, sizeof(slurm_addr_t));
  2314. } else {
  2315. memcpy(&header.orig_addr, orig_addr, sizeof(slurm_addr_t));
  2316. }
  2317. /* Forward message to other nodes */
  2318. if (header.forward.cnt > 0) {
  2319. debug("forwarding to %u", header.forward.cnt);
  2320. msg->forward_struct = xmalloc(sizeof(forward_struct_t));
  2321. slurm_mutex_init(&msg->forward_struct->forward_mutex);
  2322. pthread_cond_init(&msg->forward_struct->notify, NULL);
  2323. msg->forward_struct->forward_msg =
  2324. xmalloc(sizeof(forward_msg_t) * header.forward.cnt);
  2325. msg->forward_struct->buf_len = remaining_buf(buffer);
  2326. msg->forward_struct->buf =
  2327. xmalloc(sizeof(char) * msg->forward_struct->buf_len);
  2328. memcpy(msg->forward_struct->buf,
  2329. &buffer->head[buffer->processed],
  2330. msg->forward_struct->buf_len);
  2331. msg->forward_struct->ret_list = msg->ret_list;
  2332. /* take out the amount of timeout from this hop */
  2333. msg->forward_struct->timeout = header.forward.timeout;
  2334. if (msg->forward_struct->timeout <= 0)
  2335. msg->forward_struct->timeout = message_timeout;
  2336. msg->forward_struct->fwd_cnt = header.forward.cnt;
  2337. debug3("forwarding messages to %u nodes with timeout of %d",
  2338. msg->forward_struct->fwd_cnt,
  2339. msg->forward_struct->timeout);
  2340. if (forward_msg(msg->forward_struct, &header) == SLURM_ERROR) {
  2341. error("problem with forward msg");
  2342. }
  2343. }
  2344. if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL) {
  2345. error( "authentication: %s ",
  2346. g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
  2347. free_buf(buffer);
  2348. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2349. goto total_return;
  2350. }
  2351. if (header.flags & SLURM_GLOBAL_AUTH_KEY) {
  2352. rc = g_slurm_auth_verify( auth_cred, NULL, 2,
  2353. _global_auth_key() );
  2354. } else
  2355. rc = g_slurm_auth_verify( auth_cred, NULL, 2, NULL );
  2356. if (rc != SLURM_SUCCESS) {
  2357. error( "authentication: %s ",
  2358. g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
  2359. (void) g_slurm_auth_destroy(auth_cred);
  2360. free_buf(buffer);
  2361. rc = SLURM_PROTOCOL_AUTHENTICATION_ERROR;
  2362. goto total_return;
  2363. }
  2364. /*
  2365. * Unpack message body
  2366. */
  2367. msg->protocol_version = header.version;
  2368. msg->msg_type = header.msg_type;
  2369. msg->flags = header.flags;
  2370. if ( (header.body_length > remaining_buf(buffer)) ||
  2371. (unpack_msg(msg, buffer) != SLURM_SUCCESS) ) {
  2372. (void) g_slurm_auth_destroy(auth_cred);
  2373. free_buf(buffer);
  2374. rc = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
  2375. goto total_return;
  2376. }
  2377. msg->auth_cred = (void *) auth_cred;
  2378. free_buf(buffer);
  2379. rc = SLURM_SUCCESS;
  2380. total_return:
  2381. destroy_forward(&header.forward);
  2382. slurm_seterrno(rc);
  2383. if (rc != SLURM_SUCCESS) {
  2384. msg->msg_type = RESPONSE_FORWARD_FAILED;
  2385. msg->auth_cred = (void *) NULL;
  2386. msg->data = NULL;
  2387. error("slurm_receive_msg_and_forward: %s",
  2388. slurm_strerror(rc));
  2389. } else {
  2390. rc = 0;
  2391. }
  2392. return rc;
  2393. }
  2394. /**********************************************************************\
  2395. * send message functions
  2396. \**********************************************************************/
  2397. /*
  2398. * Do the wonderful stuff that needs be done to pack msg
  2399. * and hdr into buffer
  2400. */
  2401. static void
  2402. _pack_msg(slurm_msg_t *msg, header_t *hdr, Buf buffer)
  2403. {
  2404. unsigned int tmplen, msglen;
  2405. tmplen = get_buf_offset(buffer);
  2406. pack_msg(msg, buffer);
  2407. msglen = get_buf_offset(buffer) - tmplen;
  2408. /* update header with correct cred and msg lengths */
  2409. update_header(hdr, msglen);
  2410. /* repack updated header */
  2411. tmplen = get_buf_offset(buffer);
  2412. set_buf_offset(buffer, 0);
  2413. pack_header(hdr, buffer);
  2414. set_buf_offset(buffer, tmplen);
  2415. }
  2416. /*
  2417. * Send a slurm message over an open file descriptor `fd'
  2418. * Returns the size of the message sent in bytes, or -1 on failure.
  2419. */
  2420. int slurm_send_node_msg(slurm_fd_t fd, slurm_msg_t * msg)
  2421. {
  2422. header_t header;
  2423. Buf buffer;
  2424. int rc;
  2425. void * auth_cred;
  2426. /*
  2427. * Initialize header with Auth credential and message type.
  2428. */
  2429. if (msg->flags & SLURM_GLOBAL_AUTH_KEY)
  2430. auth_cred = g_slurm_auth_create(NULL, 2, _global_auth_key());
  2431. else
  2432. auth_cred = g_slurm_auth_create(NULL, 2, NULL);
  2433. if (auth_cred == NULL) {
  2434. error("authentication: %s",
  2435. g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) );
  2436. slurm_seterrno_ret(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
  2437. }
  2438. if (msg->forward.init != FORWARD_INIT) {
  2439. forward_init(&msg->forward, NULL);
  2440. msg->ret_list = NULL;
  2441. }
  2442. forward_wait(msg);
  2443. init_header(&header, msg, msg->flags);
  2444. /*
  2445. * Pack header into buffer for transmission
  2446. */
  2447. buffer = init_buf(BUF_SIZE);
  2448. pack_header(&header, buffer);
  2449. /*
  2450. * Pack auth credential
  2451. */
  2452. rc = g_slurm_auth_pack(auth_cred, buffer);
  2453. (void) g_slurm_auth_destroy(auth_cred);
  2454. if (rc) {
  2455. error("authentication: %s",
  2456. g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
  2457. free_buf(buffer);
  2458. slurm_seterrno_ret(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
  2459. }
  2460. /*
  2461. * Pack message into buffer
  2462. */
  2463. _pack_msg(msg, &header, buffer);
  2464. #if _DEBUG
  2465. _print_data (get_buf_data(buffer),get_buf_offset(buffer));
  2466. #endif
  2467. /*
  2468. * Send message
  2469. */
  2470. rc = _slurm_msg_sendto( fd, get_buf_data(buffer),
  2471. get_buf_offset(buffer),
  2472. SLURM_PROTOCOL_NO_SEND_RECV_FLAGS );
  2473. if ((rc < 0) && (errno == ENOTCONN)) {
  2474. debug3("slurm_msg_sendto: peer has disappeared for msg_type=%u",
  2475. msg->msg_type);
  2476. } else if (rc < 0) {
  2477. slurm_addr_t peer_addr;
  2478. char addr_str[32];
  2479. slurm_get_peer_addr(fd, &peer_addr);
  2480. slurm_print_slurm_addr(&peer_addr, addr_str, sizeof(addr_str));
  2481. error("slurm_msg_sendto: address:port=%s msg_type=%u: %m",
  2482. addr_str, msg->msg_type);
  2483. }
  2484. free_buf(buffer);
  2485. return rc;
  2486. }
  2487. /**********************************************************************\
  2488. * stream functions
  2489. \**********************************************************************/
  2490. /* slurm_listen_stream
  2491. * opens a stream server and listens on it
  2492. * IN slurm_address - slurm_addr_t to bind the server stream to
  2493. * RET slurm_fd - file descriptor of the stream created
  2494. */
  2495. slurm_fd_t slurm_listen_stream(slurm_addr_t * slurm_address)
  2496. {
  2497. return _slurm_listen_stream(slurm_address);
  2498. }
  2499. /* slurm_accept_stream
  2500. * accepts a incoming stream connection on a stream server slurm_fd
  2501. * IN open_fd - file descriptor to accept connection on
  2502. * OUT slurm_address - slurm_addr_t of the accepted connection
  2503. * RET slurm_fd - file descriptor of the accepted connection
  2504. */
  2505. slurm_fd_t slurm_accept_stream(slurm_fd_t open_fd, slurm_addr_t * slurm_address)
  2506. {
  2507. return _slurm_accept_stream(open_fd, slurm_address);
  2508. }
  2509. /* slurm_open_stream
  2510. * opens a client connection to stream server
  2511. * IN slurm_address - slurm_addr_t of the connection destination
  2512. * RET slurm_fd - file descriptor of the connection created
  2513. * NOTE: Retry with various ports as needed if connection is refused
  2514. */
  2515. slurm_fd_t slurm_open_stream(slurm_addr_t * slurm_address)
  2516. {
  2517. return _slurm_open_stream(slurm_address, true);
  2518. }
  2519. /* slurm_write_stream
  2520. * writes a buffer out a stream file descriptor
  2521. * IN open_fd - file descriptor to write on
  2522. * IN buffer - buffer to send
  2523. * IN size - size of buffer send
  2524. * IN timeout - how long to wait in milliseconds
  2525. * RET size_t - bytes sent , or -1 on errror
  2526. */
  2527. size_t slurm_write_stream(slurm_fd_t open_fd, char *buffer, size_t size)
  2528. {
  2529. return _slurm_send_timeout(open_fd, buffer, size,
  2530. SLURM_PROTOCOL_NO_SEND_RECV_FLAGS,
  2531. (slurm_get_msg_timeout() * 1000));
  2532. }
  2533. size_t slurm_write_stream_timeout(slurm_fd_t open_fd, char *buffer,
  2534. size_t size, int timeout)
  2535. {
  2536. return _slurm_send_timeout(open_fd, buffer, size,
  2537. SLURM_PROTOCOL_NO_SEND_RECV_FLAGS,
  2538. timeout);
  2539. }
  2540. /* slurm_read_stream
  2541. * read into buffer grom a stream file descriptor
  2542. * IN open_fd - file descriptor to read from
  2543. * OUT buffer - buffer to receive into
  2544. * IN size - size of buffer
  2545. * IN timeout - how long to wait in milliseconds
  2546. * RET size_t - bytes read , or -1 on errror
  2547. */
  2548. size_t slurm_read_stream(slurm_fd_t open_fd, char *buffer, size_t size)
  2549. {
  2550. return _slurm_recv_timeout(open_fd, buffer, size,
  2551. SLURM_PROTOCOL_NO_SEND_RECV_FLAGS,
  2552. (slurm_get_msg_timeout() * 1000));
  2553. }
  2554. size_t slurm_read_stream_timeout(slurm_fd_t open_fd, char *buffer,
  2555. size_t size, int timeout)
  2556. {
  2557. return _slurm_recv_timeout(open_fd, buffer, size,
  2558. SLURM_PROTOCOL_NO_SEND_RECV_FLAGS,
  2559. timeout);
  2560. }
  2561. /* slurm_get_stream_addr
  2562. * esentially a encapsilated get_sockname
  2563. * IN open_fd - file descriptor to retreive slurm_addr_t for
  2564. * OUT address - address that open_fd to bound to
  2565. */
  2566. int slurm_get_stream_addr(slurm_fd_t open_fd, slurm_addr_t * address)
  2567. {
  2568. return _slurm_get_stream_addr(open_fd, address);
  2569. }
  2570. /* slurm_close_stream
  2571. * closes either a server or client stream file_descriptor
  2572. * IN open_fd - an open file descriptor to close
  2573. * RET int - the return code
  2574. */
  2575. int slurm_close_stream(slurm_fd_t open_fd)
  2576. {
  2577. return _slurm_close_stream(open_fd);
  2578. }
  2579. /* make an open slurm connection blocking or non-blocking
  2580. * (i.e. wait or do not wait for i/o completion )
  2581. * IN open_fd - an open file descriptor to change the effect
  2582. * RET int - the return code
  2583. */
  2584. int slurm_set_stream_non_blocking(slurm_fd_t open_fd)
  2585. {
  2586. return _slurm_set_stream_non_blocking(open_fd);
  2587. }
  2588. int slurm_set_stream_blocking(slurm_fd_t open_fd)
  2589. {
  2590. return _slurm_set_stream_blocking(open_fd);
  2591. }
  2592. /**********************************************************************\
  2593. * address conversion and management functions
  2594. \**********************************************************************/
  2595. /* slurm_set_addr_uint
  2596. * initializes the slurm_address with the supplied port and ip_address
  2597. * OUT slurm_address - slurm_addr_t to be filled in
  2598. * IN port - port in host order
  2599. * IN ip_address - ipv4 address in uint32 host order form
  2600. */
  2601. void slurm_set_addr_uint(slurm_addr_t * slurm_address, uint16_t port,
  2602. uint32_t ip_address)
  2603. {
  2604. _slurm_set_addr_uint(slurm_address, port, ip_address);
  2605. }
  2606. /* slurm_set_addr_any
  2607. * initialized the slurm_address with the supplied port on INADDR_ANY
  2608. * OUT slurm_address - slurm_addr_t to be filled in
  2609. * IN port - port in host order
  2610. */
  2611. void slurm_set_addr_any(slurm_addr_t * slurm_address, uint16_t port)
  2612. {
  2613. _slurm_set_addr_uint(slurm_address, port, SLURM_INADDR_ANY);
  2614. }
  2615. /* slurm_set_addr
  2616. * initializes the slurm_address with the supplied port and host name
  2617. * OUT slurm_address - slurm_addr_t to be filled in
  2618. * IN port - port in host order
  2619. * IN host - hostname or dns name
  2620. */
  2621. void slurm_set_addr(slurm_addr_t * slurm_address, uint16_t port, char *host)
  2622. {
  2623. _slurm_set_addr_char(slurm_address, port, host);
  2624. }
  2625. /* reset_slurm_addr
  2626. * resets the address field of a slurm_addr, port and family unchanged
  2627. * OUT slurm_address - slurm_addr_t to be reset in
  2628. * IN new_address - source of address to write into slurm_address
  2629. */
  2630. void reset_slurm_addr(slurm_addr_t * slurm_address, slurm_addr_t new_address)
  2631. {
  2632. _reset_slurm_addr(slurm_address, new_address);
  2633. }
  2634. /* slurm_set_addr_char
  2635. * initializes the slurm_address with the supplied port and host
  2636. * OUT slurm_address - slurm_addr_t to be filled in
  2637. * IN port - port in host order
  2638. * IN host - hostname or dns name
  2639. */
  2640. void slurm_set_addr_char(slurm_addr_t * slurm_address, uint16_t port,
  2641. char *host)
  2642. {
  2643. _slurm_set_addr_char(slurm_address, port, host);
  2644. }
  2645. /* slurm_get_addr
  2646. * given a slurm_address it returns its port and hostname
  2647. * IN slurm_address - slurm_addr_t to be queried
  2648. * OUT port - port number
  2649. * OUT host - hostname
  2650. * IN buf_len - length of hostname buffer
  2651. */
  2652. void slurm_get_addr(slurm_addr_t * slurm_address, uint16_t * port,
  2653. char *host, unsigned int buf_len)
  2654. {
  2655. _slurm_get_addr(slurm_address, port, host, buf_len);
  2656. }
  2657. /* slurm_get_ip_str
  2658. * given a slurm_address it returns its port and ip address string
  2659. * IN slurm_address - slurm_addr_t to be queried
  2660. * OUT port - port number
  2661. * OUT ip - ip address in dotted-quad string form
  2662. * IN buf_len - length of ip buffer
  2663. */
  2664. void slurm_get_ip_str(slurm_addr_t * slurm_address, uint16_t * port,
  2665. char *ip, unsigned int buf_len)
  2666. {
  2667. unsigned char *uc = (unsigned char *)&slurm_address->sin_addr.s_addr;
  2668. *port = slurm_address->sin_port;
  2669. snprintf(ip, buf_len, "%u.%u.%u.%u", uc[0], uc[1], uc[2], uc[3]);
  2670. }
  2671. /* slurm_get_peer_addr
  2672. * get the slurm address of the peer connection, similar to getpeeraddr
  2673. * IN fd - an open connection
  2674. * OUT slurm_address - place to park the peer's slurm_addr
  2675. */
  2676. int slurm_get_peer_addr(slurm_fd_t fd, slurm_addr_t * slurm_address)
  2677. {
  2678. struct sockaddr name;
  2679. socklen_t namelen = (socklen_t) sizeof(struct sockaddr);
  2680. int rc;
  2681. if ((rc = _slurm_getpeername((int) fd, &name, &namelen)))
  2682. return rc;
  2683. memcpy(slurm_address, &name, sizeof(slurm_addr_t));
  2684. return 0;
  2685. }
  2686. /* slurm_print_slurm_addr
  2687. * prints a slurm_addr_t into a buf
  2688. * IN address - slurm_addr_t to print
  2689. * IN buf - space for string representation of slurm_addr
  2690. * IN n - max number of bytes to write (including NUL)
  2691. */
  2692. void slurm_print_slurm_addr(slurm_addr_t * address, char *buf, size_t n)
  2693. {
  2694. _slurm_print_slurm_addr(address, buf, n);
  2695. }
  2696. /**********************************************************************\
  2697. * slurm_addr_t pack routines
  2698. \**********************************************************************/
  2699. /* slurm_pack_slurm_addr
  2700. * packs a slurm_addr_t into a buffer to serialization transport
  2701. * IN slurm_address - slurm_addr_t to pack
  2702. * IN/OUT buffer - buffer to pack the slurm_addr_t into
  2703. */
  2704. void slurm_pack_slurm_addr(slurm_addr_t * slurm_address, Buf buffer)
  2705. {
  2706. _slurm_pack_slurm_addr(slurm_address, buffer);
  2707. }
  2708. /* slurm_unpack_slurm_addr
  2709. * unpacks a buffer into a slurm_addr_t after serialization transport
  2710. * OUT slurm_address - slurm_addr_t to unpack to
  2711. * IN/OUT buffer - buffer to unpack the slurm_addr_t from
  2712. * returns - SLURM error code
  2713. */
  2714. int slurm_unpack_slurm_addr_no_alloc(slurm_addr_t * slurm_address,
  2715. Buf buffer)
  2716. {
  2717. return _slurm_unpack_slurm_addr_no_alloc(slurm_address, buffer);
  2718. }
  2719. /* slurm_pack_slurm_addr_array
  2720. * packs an array of slurm_addrs into a buffer
  2721. * OUT slurm_address - slurm_addr_t to pack
  2722. * IN size_val - how many to pack
  2723. * IN/OUT buffer - buffer to pack the slurm_addr_t from
  2724. * returns - SLURM error code
  2725. */
  2726. void slurm_pack_slurm_addr_array(slurm_addr_t * slurm_address,
  2727. uint32_t size_val, Buf buffer)
  2728. {
  2729. int i = 0;
  2730. uint32_t nl = htonl(size_val);
  2731. pack32(nl, buffer);
  2732. for (i = 0; i < size_val; i++) {
  2733. slurm_pack_slurm_addr(slurm_address + i, buffer);
  2734. }
  2735. }
  2736. /* slurm_unpack_slurm_addr_array
  2737. * unpacks an array of slurm_addrs from a buffer
  2738. * OUT slurm_address - slurm_addr_t to unpack to
  2739. * IN size_val - how many to unpack
  2740. * IN/OUT buffer - buffer to upack the slurm_addr_t from
  2741. * returns - SLURM error code
  2742. */
  2743. int slurm_unpack_slurm_addr_array(slurm_addr_t ** slurm_address,
  2744. uint32_t * size_val, Buf buffer)
  2745. {
  2746. int i = 0;
  2747. uint32_t nl;
  2748. *slurm_address = NULL;
  2749. safe_unpack32(&nl, buffer);
  2750. *size_val = ntohl(nl);
  2751. *slurm_address = xmalloc((*size_val) * sizeof(slurm_addr_t));
  2752. for (i = 0; i < *size_val; i++) {
  2753. if (slurm_unpack_slurm_addr_no_alloc((*slurm_address) + i,
  2754. buffer))
  2755. goto unpack_error;
  2756. }
  2757. return SLURM_SUCCESS;
  2758. unpack_error:
  2759. xfree(*slurm_address);
  2760. *slurm_address = NULL;
  2761. return SLURM_ERROR;
  2762. }
  2763. /**********************************************************************\
  2764. * simplified communication routines
  2765. * They open a connection do work then close the connection all within
  2766. * the function
  2767. \**********************************************************************/
  2768. /* slurm_send_rc_msg
  2769. * given the original request message this function sends a
  2770. * slurm_return_code message back to the client that made the request
  2771. * IN request_msg - slurm_msg the request msg
  2772. * IN rc - the return_code to send back to the client
  2773. */
  2774. int slurm_send_rc_msg(slurm_msg_t *msg, int rc)
  2775. {
  2776. slurm_msg_t resp_msg;
  2777. return_code_msg_t rc_msg;
  2778. if (msg->conn_fd < 0) {
  2779. slurm_seterrno(ENOTCONN);
  2780. return SLURM_ERROR;
  2781. }
  2782. rc_msg.return_code = rc;
  2783. slurm_msg_t_init(&resp_msg);
  2784. resp_msg.protocol_version = msg->protocol_version;
  2785. resp_msg.address = msg->address;
  2786. resp_msg.msg_type = RESPONSE_SLURM_RC;
  2787. resp_msg.data = &rc_msg;
  2788. resp_msg.flags = msg->flags;
  2789. resp_msg.forward = msg->forward;
  2790. resp_msg.forward_struct = msg->forward_struct;
  2791. resp_msg.ret_list = msg->ret_list;
  2792. resp_msg.orig_addr = msg->orig_addr;
  2793. /* send message */
  2794. return slurm_send_node_msg(msg->conn_fd, &resp_msg);
  2795. }
  2796. /*
  2797. * Send and recv a slurm request and response on the open slurm descriptor
  2798. * IN fd - file descriptor to receive msg on
  2799. * IN req - a slurm_msg struct to be sent by the function
  2800. * OUT resp - a slurm_msg struct to be filled in by the function
  2801. * IN timeout - how long to wait in milliseconds
  2802. * RET int - returns 0 on success, -1 on failure and sets errno
  2803. */
  2804. static int
  2805. _send_and_recv_msg(slurm_fd_t fd, slurm_msg_t *req,
  2806. slurm_msg_t *resp, int timeout)
  2807. {
  2808. int retry = 0;
  2809. int rc = -1;
  2810. slurm_msg_t_init(resp);
  2811. if (slurm_send_node_msg(fd, req) >= 0) {
  2812. /* no need to adjust and timeouts here since we are not
  2813. forwarding or expecting anything other than 1 message
  2814. and the regular timeout will be altered in
  2815. slurm_receive_msg if it is 0 */
  2816. rc = slurm_receive_msg(fd, resp, timeout);
  2817. }
  2818. /*
  2819. * Attempt to close an open connection
  2820. */
  2821. while ((slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) {
  2822. if (retry++ > MAX_SHUTDOWN_RETRY) {
  2823. break;
  2824. }
  2825. }
  2826. return rc;
  2827. }
  2828. /*
  2829. * Send and recv a slurm request and response on the open slurm descriptor
  2830. * with a list containing the responses of the children (if any) we
  2831. * forwarded the message to. List containing type (ret_data_info_t).
  2832. * IN fd - file descriptor to receive msg on
  2833. * IN req - a slurm_msg struct to be sent by the function
  2834. * IN timeout - how long to wait in milliseconds
  2835. * RET List - List containing the responses of the childern (if any) we
  2836. * forwarded the message to. List containing type
  2837. * (ret_data_info_t).
  2838. */
  2839. static List
  2840. _send_and_recv_msgs(slurm_fd_t fd, slurm_msg_t *req, int timeout)
  2841. {
  2842. int retry = 0;
  2843. List ret_list = NULL;
  2844. int steps = 0;
  2845. if (!req->forward.timeout) {
  2846. if (!timeout)
  2847. timeout = slurm_get_msg_timeout() * 1000;
  2848. req->forward.timeout = timeout;
  2849. }
  2850. if (slurm_send_node_msg(fd, req) >= 0) {
  2851. if (req->forward.cnt > 0) {
  2852. /* figure out where we are in the tree and set
  2853. * the timeout for to wait for our childern
  2854. * correctly
  2855. * (timeout+message_timeout sec per step)
  2856. * to let the child timeout */
  2857. if (message_timeout < 0)
  2858. message_timeout = slurm_get_msg_timeout() * 1000;
  2859. steps = (req->forward.cnt+1)/slurm_get_tree_width();
  2860. timeout = (message_timeout*steps);
  2861. steps++;
  2862. timeout += (req->forward.timeout*steps);
  2863. }
  2864. ret_list = slurm_receive_msgs(fd, steps, timeout);
  2865. }
  2866. /*
  2867. * Attempt to close an open connection
  2868. */
  2869. while ((slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) {
  2870. if (retry++ > MAX_SHUTDOWN_RETRY) {
  2871. break;
  2872. }
  2873. }
  2874. return ret_list;
  2875. }
  2876. /*
  2877. * slurm_send_recv_controller_msg
  2878. * opens a connection to the controller, sends the controller a message,
  2879. * listens for the response, then closes the connection
  2880. * IN request_msg - slurm_msg request
  2881. * OUT response_msg - slurm_msg response
  2882. * RET int - returns 0 on success, -1 on failure and sets errno
  2883. */
  2884. int slurm_send_recv_controller_msg(slurm_msg_t *req, slurm_msg_t *resp)
  2885. {
  2886. slurm_fd_t fd = -1;
  2887. int rc = 0;
  2888. time_t start_time = time(NULL);
  2889. int retry = 1;
  2890. slurm_ctl_conf_t *conf;
  2891. bool backup_controller_flag;
  2892. uint16_t slurmctld_timeout;
  2893. slurm_addr_t ctrl_addr;
  2894. /* Just in case the caller didn't initialize his slurm_msg_t, and
  2895. * since we KNOW that we are only sending to one node (the controller),
  2896. * we initialize some forwarding variables to disable forwarding.
  2897. */
  2898. forward_init(&req->forward, NULL);
  2899. req->ret_list = NULL;
  2900. req->forward_struct = NULL;
  2901. if (working_cluster_rec)
  2902. req->flags |= SLURM_GLOBAL_AUTH_KEY;
  2903. if ((fd = slurm_open_controller_conn(&ctrl_addr)) < 0) {
  2904. rc = -1;
  2905. goto cleanup;
  2906. }
  2907. conf = slurm_conf_lock();
  2908. backup_controller_flag = conf->backup_controller ? true : false;
  2909. slurmctld_timeout = conf->slurmctld_timeout;
  2910. slurm_conf_unlock();
  2911. while (retry) {
  2912. /* If the backup controller is in the process of assuming
  2913. * control, we sleep and retry later */
  2914. retry = 0;
  2915. rc = _send_and_recv_msg(fd, req, resp, 0);
  2916. if (resp->auth_cred)
  2917. g_slurm_auth_destroy(resp->auth_cred);
  2918. else
  2919. rc = -1;
  2920. if ((rc == 0) && (!working_cluster_rec)
  2921. && (resp->msg_type == RESPONSE_SLURM_RC)
  2922. && ((((return_code_msg_t *) resp->data)->return_code)
  2923. == ESLURM_IN_STANDBY_MODE)
  2924. && (backup_controller_flag)
  2925. && (difftime(time(NULL), start_time)
  2926. < (slurmctld_timeout + (slurmctld_timeout / 2)))) {
  2927. debug("Neither primary nor backup controller "
  2928. "responding, sleep and retry");
  2929. slurm_free_return_code_msg(resp->data);
  2930. sleep(30);
  2931. if ((fd = slurm_open_controller_conn(&ctrl_addr))
  2932. < 0) {
  2933. rc = -1;
  2934. } else {
  2935. retry = 1;
  2936. }
  2937. }
  2938. if (rc == -1)
  2939. break;
  2940. }
  2941. cleanup:
  2942. if (rc != 0)
  2943. _remap_slurmctld_errno();
  2944. return rc;
  2945. }
  2946. /* slurm_send_recv_node_msg
  2947. * opens a connection to node, sends the node a message, listens
  2948. * for the response, then closes the connection
  2949. * IN request_msg - slurm_msg request
  2950. * OUT response_msg - slurm_msg response
  2951. * IN timeout - how long to wait in milliseconds
  2952. * RET int - returns 0 on success, -1 on failure and sets errno
  2953. */
  2954. int slurm_send_recv_node_msg(slurm_msg_t *req, slurm_msg_t *resp, int timeout)
  2955. {
  2956. slurm_fd_t fd = -1;
  2957. resp->auth_cred = NULL;
  2958. if ((fd = slurm_open_msg_conn(&req->address)) < 0)
  2959. return -1;
  2960. return _send_and_recv_msg(fd, req, resp, timeout);
  2961. }
  2962. /* slurm_send_only_controller_msg
  2963. * opens a connection to the controller, sends the controller a
  2964. * message then, closes the connection
  2965. * IN request_msg - slurm_msg request
  2966. * RET int - return code
  2967. * NOTE: NOT INTENDED TO BE CROSS-CLUSTER
  2968. */
  2969. int slurm_send_only_controller_msg(slurm_msg_t *req)
  2970. {
  2971. int rc = SLURM_SUCCESS;
  2972. int retry = 0;
  2973. slurm_fd_t fd = -1;
  2974. slurm_addr_t ctrl_addr;
  2975. /*
  2976. * Open connection to SLURM controller:
  2977. */
  2978. if ((fd = slurm_open_controller_conn(&ctrl_addr)) < 0) {
  2979. rc = SLURM_SOCKET_ERROR;
  2980. goto cleanup;
  2981. }
  2982. if ((rc = slurm_send_node_msg(fd, req) < 0)) {
  2983. rc = SLURM_ERROR;
  2984. } else {
  2985. debug3("slurm_send_only_controller_msg: sent %d", rc);
  2986. rc = SLURM_SUCCESS;
  2987. }
  2988. /*
  2989. * Attempt to close an open connection
  2990. */
  2991. while ( (slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) {
  2992. if (retry++ > MAX_SHUTDOWN_RETRY) {
  2993. rc = SLURM_SOCKET_ERROR;
  2994. goto cleanup;
  2995. }
  2996. }
  2997. cleanup:
  2998. if (rc != SLURM_SUCCESS)
  2999. _remap_slurmctld_errno();
  3000. return rc;
  3001. }
  3002. /*
  3003. * Open a connection to the "address" specified in the slurm msg `req'
  3004. * Then, immediately close the connection w/out waiting for a reply.
  3005. *
  3006. * Returns SLURM_SUCCESS on success SLURM_FAILURE (< 0) for failure.
  3007. */
  3008. int slurm_send_only_node_msg(slurm_msg_t *req)
  3009. {
  3010. int rc = SLURM_SUCCESS;
  3011. int retry = 0;
  3012. slurm_fd_t fd = -1;
  3013. if ((fd = slurm_open_msg_conn(&req->address)) < 0) {
  3014. return SLURM_SOCKET_ERROR;
  3015. }
  3016. if ((rc = slurm_send_node_msg(fd, req) < 0)) {
  3017. rc = SLURM_ERROR;
  3018. } else {
  3019. debug3("slurm_send_only_node_msg: sent %d", rc);
  3020. rc = SLURM_SUCCESS;
  3021. }
  3022. /*
  3023. * Attempt to close an open connection
  3024. */
  3025. while ( (slurm_shutdown_msg_conn(fd) < 0) && (errno == EINTR) ) {
  3026. if (retry++ > MAX_SHUTDOWN_RETRY)
  3027. return SLURM_SOCKET_ERROR;
  3028. }
  3029. return rc;
  3030. }
  3031. /*
  3032. * Send a message to the nodelist specificed using fanout
  3033. * Then return List containing type (ret_data_info_t).
  3034. * IN nodelist - list of nodes to send to.
  3035. * IN msg - a slurm_msg struct to be sent by the function
  3036. * IN timeout - how long to wait in milliseconds
  3037. * IN quiet - if set, reduce logging details
  3038. * RET List - List containing the responses of the childern
  3039. * (if any) we forwarded the message to. List
  3040. * containing type (ret_data_info_t).
  3041. */
  3042. List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg,
  3043. int timeout, bool quiet)
  3044. {
  3045. List ret_list = NULL;
  3046. hostlist_t hl = NULL;
  3047. if (!nodelist || !strlen(nodelist)) {
  3048. error("slurm_send_recv_msgs: no nodelist given");
  3049. return NULL;
  3050. }
  3051. hl = hostlist_create(nodelist);
  3052. if (!hl) {
  3053. error("slurm_send_recv_msgs: problem creating hostlist");
  3054. return NULL;
  3055. }
  3056. ret_list = start_msg_tree(hl, msg, timeout);
  3057. hostlist_destroy(hl);
  3058. return ret_list;
  3059. }
  3060. /*
  3061. * Send a message to msg->address
  3062. * Then return List containing type (ret_data_info_t).
  3063. * IN msg - a slurm_msg struct to be sent by the function
  3064. * IN timeout - how long to wait in milliseconds
  3065. * RET List - List containing the responses of the childern
  3066. * (if any) we forwarded the message to. List
  3067. * containing type (ret_types_t).
  3068. */
  3069. List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout)
  3070. {
  3071. static uint16_t conn_timeout = (uint16_t) NO_VAL;
  3072. List ret_list = NULL;
  3073. slurm_fd_t fd = -1;
  3074. ret_data_info_t *ret_data_info = NULL;
  3075. ListIterator itr;
  3076. int i;
  3077. if (conn_timeout == (uint16_t) NO_VAL)
  3078. conn_timeout = MIN(slurm_get_msg_timeout(), 10);
  3079. /* This connect retry logic permits Slurm hierarchical communications
  3080. * to better survive slurmd restarts */
  3081. for (i = 0; i <= conn_timeout; i++) {
  3082. if (i > 0)
  3083. sleep(1);
  3084. fd = slurm_open_msg_conn(&msg->address);
  3085. if ((fd >= 0) || (errno != ECONNREFUSED))
  3086. break;
  3087. if (i == 0)
  3088. debug3("connect refused, retrying");
  3089. }
  3090. if (fd < 0) {
  3091. mark_as_failed_forward(&ret_list, name,
  3092. SLURM_COMMUNICATIONS_CONNECTION_ERROR);
  3093. errno = SLURM_COMMUNICATIONS_CONNECTION_ERROR;
  3094. return ret_list;
  3095. }
  3096. msg->ret_list = NULL;
  3097. msg->forward_struct = NULL;
  3098. if (!(ret_list = _send_and_recv_msgs(fd, msg, timeout))) {
  3099. mark_as_failed_forward(&ret_list, name, errno);
  3100. errno = SLURM_COMMUNICATIONS_CONNECTION_ERROR;
  3101. return ret_list;
  3102. } else {
  3103. itr = list_iterator_create(ret_list);
  3104. if (!itr)
  3105. fatal("list_iterator_create: malloc failure");
  3106. while ((ret_data_info = list_next(itr)))
  3107. if (!ret_data_info->node_name) {
  3108. ret_data_info->node_name = xstrdup(name);
  3109. }
  3110. list_iterator_destroy(itr);
  3111. }
  3112. return ret_list;
  3113. }
  3114. /*
  3115. * Open a connection to the "address" specified in the slurm msg "req".
  3116. * Then read back an "rc" message returning the "return_code" specified
  3117. * in the response in the "rc" parameter.
  3118. * IN req - a slurm_msg struct to be sent by the function
  3119. * OUT rc - return code from the sent message
  3120. * IN timeout - how long to wait in milliseconds
  3121. * RET int either 0 for success or -1 for failure.
  3122. */
  3123. int slurm_send_recv_rc_msg_only_one(slurm_msg_t *req, int *rc, int timeout)
  3124. {
  3125. slurm_fd_t fd = -1;
  3126. int ret_c = 0;
  3127. slurm_msg_t resp;
  3128. slurm_msg_t_init(&resp);
  3129. /* Just in case the caller didn't initialize his slurm_msg_t, and
  3130. * since we KNOW that we are only sending to one node,
  3131. * we initialize some forwarding variables to disable forwarding.
  3132. */
  3133. forward_init(&req->forward, NULL);
  3134. req->ret_list = NULL;
  3135. req->forward_struct = NULL;
  3136. if ((fd = slurm_open_msg_conn(&req->address)) < 0) {
  3137. return -1;
  3138. }
  3139. if (!_send_and_recv_msg(fd, req, &resp, timeout)) {
  3140. if (resp.auth_cred)
  3141. g_slurm_auth_destroy(resp.auth_cred);
  3142. *rc = slurm_get_return_code(resp.msg_type, resp.data);
  3143. slurm_free_msg_data(resp.msg_type, resp.data);
  3144. ret_c = 0;
  3145. } else
  3146. ret_c = -1;
  3147. return ret_c;
  3148. }
  3149. /*
  3150. * Send message to controller and get return code.
  3151. * Make use of slurm_send_recv_controller_msg(), which handles
  3152. * support for backup controller and retry during transistion.
  3153. */
  3154. int slurm_send_recv_controller_rc_msg(slurm_msg_t *req, int *rc)
  3155. {
  3156. int ret_c;
  3157. slurm_msg_t resp;
  3158. if (!slurm_send_recv_controller_msg(req, &resp)) {
  3159. *rc = slurm_get_return_code(resp.msg_type, resp.data);
  3160. slurm_free_msg_data(resp.msg_type, resp.data);
  3161. ret_c = 0;
  3162. } else {
  3163. ret_c = -1;
  3164. }
  3165. return ret_c;
  3166. }
  3167. /* this is used to set how many nodes are going to be on each branch
  3168. * of the tree.
  3169. * IN total - total number of nodes to send to
  3170. * IN tree_width - how wide the tree should be on each hop
  3171. * RET int * - int array tree_width in length each space
  3172. * containing the number of nodes to send to each hop
  3173. * on the span.
  3174. */
  3175. extern int *set_span(int total, uint16_t tree_width)
  3176. {
  3177. int *span = NULL;
  3178. int left = total;
  3179. int i = 0;
  3180. if (tree_width == 0)
  3181. tree_width = slurm_get_tree_width();
  3182. span = xmalloc(sizeof(int) * tree_width);
  3183. //info("span count = %d", tree_width);
  3184. if (total <= tree_width) {
  3185. return span;
  3186. }
  3187. while (left > 0) {
  3188. for(i = 0; i < tree_width; i++) {
  3189. if ((tree_width-i) >= left) {
  3190. if (span[i] == 0) {
  3191. left = 0;
  3192. break;
  3193. } else {
  3194. span[i] += left;
  3195. left = 0;
  3196. break;
  3197. }
  3198. } else if (left <= tree_width) {
  3199. span[i] += left;
  3200. left = 0;
  3201. break;
  3202. }
  3203. span[i] += tree_width;
  3204. left -= tree_width;
  3205. }
  3206. }
  3207. return span;
  3208. }
  3209. /*
  3210. * Free a slurm message
  3211. */
  3212. extern void slurm_free_msg(slurm_msg_t * msg)
  3213. {
  3214. if (msg->auth_cred)
  3215. (void) g_slurm_auth_destroy(msg->auth_cred);
  3216. if (msg->ret_list) {
  3217. list_destroy(msg->ret_list);
  3218. msg->ret_list = NULL;
  3219. }
  3220. xfree(msg);
  3221. }
  3222. extern char *nodelist_nth_host(const char *nodelist, int inx)
  3223. {
  3224. hostlist_t hl = hostlist_create(nodelist);
  3225. char *name = hostlist_nth(hl, inx);
  3226. hostlist_destroy(hl);
  3227. return name;
  3228. }
  3229. extern int nodelist_find(const char *nodelist, const char *name)
  3230. {
  3231. hostlist_t hl = hostlist_create(nodelist);
  3232. int id = hostlist_find(hl, name);
  3233. hostlist_destroy(hl);
  3234. return id;
  3235. }
  3236. extern void convert_num_unit(float num, char *buf, int buf_size, int orig_type)
  3237. {
  3238. char *unit = "\0KMGTP?";
  3239. int i = (int)num % 512;
  3240. if ((int)num == 0) {
  3241. snprintf(buf, buf_size, "%d", (int)num);
  3242. return;
  3243. } else if (i > 0) {
  3244. snprintf(buf, buf_size, "%d%c", (int)num, unit[orig_type]);
  3245. return;
  3246. }
  3247. while (num > 1024) {
  3248. num /= 1024;
  3249. orig_type++;
  3250. }
  3251. if (orig_type < UNIT_NONE || orig_type > UNIT_PETA)
  3252. orig_type = UNIT_UNKNOWN;
  3253. i = (int)num;
  3254. /* Here we are checking to see if these numbers are the same,
  3255. * meaning the float has not floating point. If we do have
  3256. * floating point print as a float.
  3257. */
  3258. if ((float)i == num)
  3259. snprintf(buf, buf_size, "%d%c", i, unit[orig_type]);
  3260. else
  3261. snprintf(buf, buf_size, "%.2f%c", num, unit[orig_type]);
  3262. }
  3263. extern int revert_num_unit(const char *buf)
  3264. {
  3265. char *unit = "\0KMGTP\0";
  3266. int i = 1, j = 0, number = 0;
  3267. if (!buf)
  3268. return -1;
  3269. j = strlen(buf) - 1;
  3270. while (unit[i]) {
  3271. if (toupper((int)buf[j]) == unit[i])
  3272. break;
  3273. i++;
  3274. }
  3275. number = atoi(buf);
  3276. if (unit[i])
  3277. number *= (i*1024);
  3278. return number;
  3279. }
  3280. #if _DEBUG
  3281. static void _print_data(char *data, int len)
  3282. {
  3283. int i;
  3284. for (i = 0; i < len; i++) {
  3285. if ((i % 10 == 0) && (i != 0))
  3286. printf("\n");
  3287. printf("%2.2x ", ((int) data[i] & 0xff));
  3288. if (i >= 200)
  3289. break;
  3290. }
  3291. printf("\n\n");
  3292. }
  3293. #endif
  3294. /*
  3295. * slurm_forward_data - forward arbitrary data to unix domain sockets on nodes
  3296. * IN nodelist: nodes to forward data to
  3297. * IN address: address of unix domain socket
  3298. * IN len: length of data
  3299. * IN data: real data
  3300. * RET: error code
  3301. */
  3302. extern int
  3303. slurm_forward_data(char *nodelist, char *address, uint32_t len, char *data)
  3304. {
  3305. List ret_list = NULL;
  3306. int temp_rc = 0, rc = 0;
  3307. ret_data_info_t *ret_data_info = NULL;
  3308. slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
  3309. forward_data_msg_t req;
  3310. slurm_msg_t_init(msg);
  3311. debug("slurm_forward_data: nodelist=%s, address=%s, len=%u",
  3312. nodelist, address, len);
  3313. req.address = address;
  3314. req.len = len;
  3315. req.data = data;
  3316. msg->msg_type = REQUEST_FORWARD_DATA;
  3317. msg->data = &req;
  3318. if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) {
  3319. while ((ret_data_info = list_pop(ret_list))) {
  3320. temp_rc = slurm_get_return_code(ret_data_info->type,
  3321. ret_data_info->data);
  3322. if (temp_rc)
  3323. rc = temp_rc;
  3324. }
  3325. } else {
  3326. error("slurm_forward_data: no list was returned");
  3327. rc = SLURM_ERROR;
  3328. }
  3329. slurm_free_msg(msg);
  3330. return rc;
  3331. }
  3332. /*
  3333. * vi: shiftwidth=8 tabstop=8 expandtab
  3334. */