PageRenderTime 77ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/src/api/pmi.c

https://github.com/cfenoy/slurm
C | 1907 lines | 960 code | 189 blank | 758 comment | 359 complexity | a52e79ef3c183f9010fb5a0327656a49 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /*****************************************************************************\
  2. * pmi.c - Process Management Interface for MPICH2
  3. * See http://www-unix.mcs.anl.gov/mpi/mpich2/
  4. *
  5. * NOTE: Dynamic Process Management functions (PMI part 2) are not supported
  6. * at this time. Functions required for MPI-1 (PMI part 1) are supported.
  7. *****************************************************************************
  8. * COPYRIGHT: For the function definitions
  9. *
  10. * The following is a notice of limited availability of the code, and
  11. * disclaimer which must be included in the prologue of the code and in all
  12. * source listings of the code.
  13. *
  14. * Copyright Notice + 2002 University of Chicago
  15. *
  16. * Permission is hereby granted to use, reproduce, prepare derivative
  17. * works, and to redistribute to others. This software was authored by:
  18. *
  19. * Argonne National Laboratory Group
  20. * W. Gropp: (630) 252-4318; FAX: (630) 252-5986; e-mail: gropp@mcs.anl.gov
  21. * E. Lusk: (630) 252-7852; FAX: (630) 252-5986; e-mail: lusk@mcs.anl.gov
  22. * Mathematics and Computer Science Division Argonne National Laboratory,
  23. * Argonne IL 60439
  24. *
  25. * GOVERNMENT LICENSE
  26. *
  27. * Portions of this material resulted from work developed under a U.S.
  28. * Government Contract and are subject to the following license: the
  29. * Government is granted for itself and others acting on its behalf a
  30. * paid-up, nonexclusive, irrevocable worldwide license in this computer
  31. * software to reproduce, prepare derivative works, and perform publicly
  32. * and display publicly.
  33. *
  34. * DISCLAIMER
  35. *
  36. * This computer code material was prepared, in part, as an account of work
  37. * sponsored by an agency of the United States Government. Neither the
  38. * United States, nor the University of Chicago, nor any of their
  39. * employees, makes any warranty express or implied, or assumes any legal
  40. * liability or responsibility for the accuracy, completeness, or
  41. * usefulness of any information, apparatus, product, or process disclosed,
  42. * or represents that its use would not infringe privately owned rights.
  43. *
  44. * MCS Division <http://www.mcs.anl.gov> Argonne National Laboratory
  45. * <http://www.anl.gov> University of Chicago <http://www.uchicago.edu>
  46. *****************************************************************************
  47. * COPYRIGHT: For the implementation of the functions
  48. *
  49. * Copyright (C) 2005-2007 The Regents of the University of California.
  50. * Copyright (C) 2008-2010 Lawrence Livermore National Security.
  51. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  52. * Written by Morris Jette <jette1@llnl.gov>
  53. * CODE-OCEC-09-009. All rights reserved.
  54. *
  55. * This file is part of SLURM, a resource management program.
  56. * For details, see <http://www.schedmd.com/slurmdocs/>.
  57. * Please also read the included file: DISCLAIMER.
  58. *
  59. * SLURM is free software; you can redistribute it and/or modify it under
  60. * the terms of the GNU General Public License as published by the Free
  61. * Software Foundation; either version 2 of the License, or (at your option)
  62. * any later version.
  63. *
  64. * In addition, as a special exception, the copyright holders give permission
  65. * to link the code of portions of this program with the OpenSSL library under
  66. * certain conditions as described in each individual source file, and
  67. * distribute linked combinations including the two. You must obey the GNU
  68. * General Public License in all respects for all of the code used other than
  69. * OpenSSL. If you modify file(s) with this exception, you may extend this
  70. * exception to your version of the file(s), but you are not obligated to do
  71. * so. If you do not wish to do so, delete this exception statement from your
  72. * version. If you delete this exception statement from all source files in
  73. * the program, then also delete it here.
  74. *
  75. * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
  76. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  77. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  78. * details.
  79. *
  80. * You should have received a copy of the GNU General Public License along
  81. * with SLURM; if not, write to the Free Software Foundation, Inc.,
  82. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  83. \*****************************************************************************/
  84. #ifndef _GNU_SOURCE
  85. # define _GNU_SOURCE
  86. #endif
  87. #include <pthread.h>
  88. #include <signal.h>
  89. #include <stdio.h>
  90. #include <stdlib.h>
  91. #include <string.h>
  92. #include "slurm/pmi.h"
  93. #include "slurm/slurm.h"
  94. #include "slurm/slurm_errno.h"
  95. #include "src/api/slurm_pmi.h"
  96. #include "src/common/macros.h"
  97. #include "src/common/malloc.h"
  98. #include "src/slurmd/slurmstepd/slurmstepd_job.h"
  99. #define KVS_STATE_LOCAL 0
  100. #define KVS_STATE_DEFUNCT 1
  101. #define KVS_KEY_STATE_GLOBAL 0
  102. #define KVS_KEY_STATE_LOCAL 1
  103. /* default key names form is jobid.stepid[.taskid.sequence] */
  104. struct kvs_rec {
  105. char * kvs_name;
  106. uint16_t kvs_state; /* see KVS_STATE_* */
  107. uint32_t kvs_cnt; /* count of key-pairs */
  108. uint16_t kvs_inx; /* iteration index */
  109. uint16_t * kvs_key_states; /* see KVS_KEY_STATE_* */
  110. char ** kvs_keys;
  111. char ** kvs_values;
  112. };
  113. #define _DEBUG 0
  114. static void _del_kvs_rec( struct kvs_rec *kvs_ptr );
  115. static void _init_kvs( char kvsname[] );
  116. inline static void _kvs_dump(void);
  117. static int _kvs_put( const char kvsname[], const char key[],
  118. const char value[], int local);
  119. static void _kvs_swap(struct kvs_rec *kvs_ptr, int inx1, int inx2);
  120. /* Global variables */
  121. long pmi_jobid;
  122. long pmi_stepid;
  123. int pmi_init = 0;
  124. int pmi_size;
  125. int pmi_spawned;
  126. int pmi_rank;
  127. int pmi_debug;
  128. static int pmi_kvs_no_dup_keys = 0;
  129. static pthread_mutex_t kvs_mutex = PTHREAD_MUTEX_INITIALIZER;
  130. int kvs_rec_cnt = 0;
  131. struct kvs_rec *kvs_recs;
  132. int kvs_name_sequence = 0;
  133. static char *pmi_opt_str =
  134. "pmi command line options \n"
  135. " \n"
  136. " \n"
  137. " \n";
  138. static int IsPmiKey(char *);
  139. /* PMI Group functions */
  140. /*@
  141. PMI_Init - initialize the Process Manager Interface
  142. Output Parameter:
  143. . spawned - spawned flag
  144. Return values:
  145. + PMI_SUCCESS - initialization completed successfully
  146. . PMI_ERR_INVALID_ARG - invalid argument
  147. - PMI_FAIL - initialization failed
  148. Notes:
  149. Initialize PMI for this process group. The value of spawned indicates whether
  150. this process was created by 'PMI_Spawn_multiple'. 'spawned' will be 'PMI_TRUE'
  151. if this process group has a parent and 'PMI_FALSE' if it does not.
  152. @*/
  153. int PMI_Init( int *spawned )
  154. {
  155. char *env;
  156. env = getenv("PMI_DEBUG");
  157. if (env)
  158. pmi_debug = atoi(env);
  159. else
  160. pmi_debug = 0;
  161. if (pmi_debug)
  162. fprintf(stderr, "In: PMI_Init\n");
  163. env = getenv("SLURM_PMI_KVS_NO_DUP_KEYS");
  164. if (env)
  165. pmi_kvs_no_dup_keys = 1;
  166. else
  167. pmi_kvs_no_dup_keys = 0;
  168. if (spawned == NULL)
  169. return PMI_ERR_INVALID_ARG;
  170. if (pmi_init)
  171. goto replay;
  172. env = getenv("SLURM_JOB_ID");
  173. if (env)
  174. pmi_jobid = atoi(env);
  175. else
  176. pmi_jobid = 0;
  177. env = getenv("SLURM_STEPID");
  178. if (env)
  179. pmi_stepid = atoi(env);
  180. else
  181. pmi_stepid = 0;
  182. env = getenv("PMI_SPAWNED");
  183. if (env)
  184. pmi_spawned = atoi(env);
  185. else
  186. pmi_spawned = 0;
  187. env = getenv("SLURM_NPROCS");
  188. if (!env)
  189. env = getenv("PMI_SIZE");
  190. if (env)
  191. pmi_size = atoi(env);
  192. else
  193. pmi_size = 1;
  194. env = getenv("SLURM_PROCID");
  195. if (!env)
  196. env = getenv("PMI_RANK");
  197. if (env)
  198. pmi_rank = atoi(env);
  199. else
  200. pmi_rank = 0;
  201. pmi_init = 1;
  202. replay: if (pmi_spawned)
  203. *spawned = PMI_TRUE;
  204. else
  205. *spawned = PMI_FALSE;
  206. return PMI_SUCCESS;
  207. }
  208. /*@
  209. PMI_Initialized - check if PMI has been initialized
  210. Output Parameter:
  211. . initialized - boolean value
  212. Return values:
  213. + PMI_SUCCESS - initialized successfully set
  214. . PMI_ERR_INVALID_ARG - invalid argument
  215. - PMI_FAIL - unable to set the variable
  216. Notes:
  217. On successful output, initialized will either be 'PMI_TRUE' or 'PMI_FALSE'.
  218. + PMI_TRUE - initialize has been called.
  219. - PMI_FALSE - initialize has not been called or previously failed.
  220. @*/
  221. int PMI_Initialized( PMI_BOOL *initialized )
  222. {
  223. if (pmi_debug)
  224. fprintf(stderr, "In: PMI_Initialized\n");
  225. if (initialized == NULL)
  226. return PMI_ERR_INVALID_ARG;
  227. if (pmi_init)
  228. *initialized = PMI_TRUE;
  229. else
  230. *initialized = PMI_FALSE;
  231. return PMI_SUCCESS;
  232. }
  233. /*@
  234. PMI_Finalize - finalize the Process Manager Interface
  235. Return values:
  236. + PMI_SUCCESS - finalization completed successfully
  237. - PMI_FAIL - finalization failed
  238. Notes:
  239. Finalize PMI for this process group.
  240. @*/
  241. int PMI_Finalize( void )
  242. {
  243. int i;
  244. if (pmi_debug)
  245. fprintf(stderr, "In: PMI_Finalize\n");
  246. pmi_init = 0;
  247. pthread_mutex_lock(&kvs_mutex);
  248. for (i=0; i<kvs_rec_cnt; i++)
  249. _del_kvs_rec(&kvs_recs[i]);
  250. if (kvs_recs)
  251. free(kvs_recs);
  252. kvs_recs = NULL;
  253. kvs_rec_cnt = 0;
  254. pthread_mutex_unlock(&kvs_mutex);
  255. slurm_pmi_finalize();
  256. return PMI_SUCCESS;
  257. }
  258. static void _del_kvs_rec(struct kvs_rec *kvs_ptr)
  259. {
  260. int i;
  261. if (kvs_ptr == NULL)
  262. return;
  263. for (i=0; i<kvs_ptr->kvs_cnt; i++) {
  264. if (kvs_ptr->kvs_keys[i])
  265. free(kvs_ptr->kvs_keys[i]);
  266. if (kvs_ptr->kvs_values[i])
  267. free(kvs_ptr->kvs_values[i]);
  268. }
  269. if (kvs_ptr->kvs_name)
  270. free(kvs_ptr->kvs_name);
  271. return;
  272. }
  273. /*@
  274. PMI_Get_size - obtain the size of the process group
  275. Output Parameters:
  276. . size - pointer to an integer that receives the size of the process group
  277. Return values:
  278. + PMI_SUCCESS - size successfully obtained
  279. . PMI_ERR_INVALID_ARG - invalid argument
  280. - PMI_FAIL - unable to return the size
  281. Notes:
  282. This function returns the size of the process group to which the local process
  283. belongs.
  284. @*/
  285. int PMI_Get_size( int *size )
  286. {
  287. if (pmi_debug)
  288. fprintf(stderr, "In: PMI_Get_size\n");
  289. if (size == NULL)
  290. return PMI_ERR_INVALID_ARG;
  291. if (pmi_init == 0)
  292. return PMI_FAIL;
  293. *size = pmi_size;
  294. return PMI_SUCCESS;
  295. }
  296. /*@
  297. PMI_Get_rank - obtain the rank of the local process in the process group
  298. Output Parameters:
  299. . rank - pointer to an integer that receives the rank in the process group
  300. Return values:
  301. + PMI_SUCCESS - rank successfully obtained
  302. . PMI_ERR_INVALID_ARG - invalid argument
  303. - PMI_FAIL - unable to return the rank
  304. Notes:
  305. This function returns the rank of the local process in its process group.
  306. @*/
  307. int PMI_Get_rank( int *rank )
  308. {
  309. if (pmi_debug)
  310. fprintf(stderr, "In: PMI_Get_rank\n");
  311. if (rank == NULL)
  312. return PMI_ERR_INVALID_ARG;
  313. if (pmi_init == 0)
  314. return PMI_FAIL;
  315. *rank = pmi_rank;
  316. return PMI_SUCCESS;
  317. }
  318. /*@
  319. PMI_Get_universe_size - obtain the universe size
  320. (NOTE: "universe size" indicates the maximum recommended
  321. process count for the job.)
  322. Output Parameters:
  323. . size - pointer to an integer that receives the size
  324. Return values:
  325. + PMI_SUCCESS - size successfully obtained
  326. . PMI_ERR_INVALID_ARG - invalid argument
  327. - PMI_FAIL - unable to return the size
  328. @*/
  329. int PMI_Get_universe_size( int *size )
  330. {
  331. if (pmi_debug)
  332. fprintf(stderr, "In: PMI_Get_universe_size\n");
  333. if (size == NULL)
  334. return PMI_ERR_INVALID_ARG;
  335. if (pmi_init == 0)
  336. return PMI_FAIL;
  337. *size = pmi_size;
  338. return PMI_SUCCESS;
  339. }
  340. /*@
  341. PMI_Get_appnum - obtain the application number
  342. Output parameters:
  343. . appnum - pointer to an integer that receives the appnum
  344. Return values:
  345. + PMI_SUCCESS - appnum successfully obtained
  346. . PMI_ERR_INVALID_ARG - invalid argument
  347. - PMI_FAIL - unable to return the size
  348. @*/
  349. int PMI_Get_appnum( int *appnum )
  350. {
  351. if (pmi_debug)
  352. fprintf(stderr, "In: PMI_Get_appnum\n");
  353. if (appnum == NULL)
  354. return PMI_ERR_INVALID_ARG;
  355. if (pmi_init == 0)
  356. return PMI_FAIL;
  357. *appnum = pmi_jobid;
  358. return PMI_SUCCESS;
  359. }
  360. /*@
  361. PMI_Publish_name - publish a name
  362. Input parameters:
  363. . service_name - string representing the service being published
  364. . port - string representing the port on which to contact the service
  365. Return values:
  366. + PMI_SUCCESS - port for service successfully published
  367. . PMI_ERR_INVALID_ARG - invalid argument
  368. - PMI_FAIL - unable to publish service
  369. @*/
  370. int PMI_Publish_name( const char service_name[], const char port[] )
  371. {
  372. if (pmi_debug)
  373. fprintf(stderr, "In: PMI_Publish_name - NOT SUPPORTED\n");
  374. if ((service_name == NULL) || (port == NULL))
  375. return PMI_ERR_INVALID_ARG;
  376. /* FIXME */
  377. return PMI_FAIL;
  378. }
  379. /*@
  380. PMI_Unpublish_name - unpublish a name
  381. Input parameters:
  382. . service_name - string representing the service being unpublished
  383. Return values:
  384. + PMI_SUCCESS - port for service successfully published
  385. . PMI_ERR_INVALID_ARG - invalid argument
  386. - PMI_FAIL - unable to unpublish service
  387. @*/
  388. int PMI_Unpublish_name( const char service_name[] )
  389. {
  390. if (pmi_debug)
  391. fprintf(stderr, "In: PMI_Unpublish_name - NOT SUPPORTED\n");
  392. if (service_name == NULL)
  393. return PMI_ERR_INVALID_ARG;
  394. /* FIXME */
  395. return PMI_FAIL;
  396. }
  397. /*@
  398. PMI_Lookup_name - lookup a service by name
  399. Input parameters:
  400. . service_name - string representing the service being published
  401. Output parameters:
  402. . port - string representing the port on which to contact the service
  403. Return values:
  404. + PMI_SUCCESS - port for service successfully obtained
  405. . PMI_ERR_INVALID_ARG - invalid argument
  406. - PMI_FAIL - unable to lookup service
  407. @*/
  408. int PMI_Lookup_name( const char service_name[], char port[] )
  409. {
  410. if (pmi_debug)
  411. fprintf(stderr, "In: PMI_Lookup_name - NOT SUPPORTED\n");
  412. if ((service_name == NULL) || (port == NULL))
  413. return PMI_ERR_INVALID_ARG;
  414. /* FIXME */
  415. return PMI_FAIL;
  416. }
  417. /*@
  418. PMI_Get_id - obtain the id of the process group
  419. Input Parameter:
  420. . length - length of the id_str character array
  421. Output Parameter:
  422. . id_str - character array that receives the id of the process group
  423. Return values:
  424. + PMI_SUCCESS - id successfully obtained
  425. . PMI_ERR_INVALID_ARG - invalid id_str argument
  426. . PMI_ERR_INVALID_LENGTH - invalid length argument
  427. - PMI_FAIL - unable to return the id
  428. Notes:
  429. This function returns a string that uniquely identifies the process group
  430. that the local process belongs to. The string passed in must be at least
  431. as long as the number returned by 'PMI_Get_id_length_max()'.
  432. @*/
  433. int PMI_Get_id( char id_str[], int length )
  434. {
  435. if (pmi_debug)
  436. fprintf(stderr, "In: PMI_Get_id\n");
  437. if (length < PMI_MAX_ID_LEN)
  438. return PMI_ERR_INVALID_LENGTH;
  439. if (id_str == NULL)
  440. return PMI_ERR_INVALID_ARG;
  441. if (pmi_init == 0)
  442. return PMI_FAIL;
  443. snprintf(id_str, length, "%ld.%ld", pmi_jobid, pmi_stepid);
  444. return PMI_SUCCESS;
  445. }
  446. /*@
  447. PMI_Get_kvs_domain_id - obtain the id of the PMI domain
  448. Input Parameter:
  449. . length - length of id_str character array
  450. Output Parameter:
  451. . id_str - character array that receives the id of the PMI domain
  452. Return values:
  453. + PMI_SUCCESS - id successfully obtained
  454. . PMI_ERR_INVALID_ARG - invalid argument
  455. . PMI_ERR_INVALID_LENGTH - invalid length argument
  456. - PMI_FAIL - unable to return the id
  457. Notes:
  458. This function returns a string that uniquely identifies the PMI domain
  459. where keyval spaces can be shared. The string passed in must be at least
  460. as long as the number returned by 'PMI_Get_id_length_max()'.
  461. @*/
  462. int PMI_Get_kvs_domain_id( char id_str[], int length )
  463. {
  464. if (pmi_debug)
  465. fprintf(stderr, "In: PMI_Get_kvs_domain_id\n");
  466. if (length < PMI_MAX_ID_LEN)
  467. return PMI_ERR_INVALID_LENGTH;
  468. if (id_str == NULL)
  469. return PMI_ERR_INVALID_ARG;
  470. if (pmi_init == 0)
  471. return PMI_FAIL;
  472. snprintf(id_str, length, "%ld.%ld", pmi_jobid, pmi_stepid);
  473. return PMI_SUCCESS;
  474. }
  475. /*@
  476. PMI_Get_id_length_max - obtain the maximum length of an id string
  477. Output Parameters:
  478. . length - the maximum length of an id string
  479. Return values:
  480. + PMI_SUCCESS - length successfully set
  481. . PMI_ERR_INVALID_ARG - invalid argument
  482. - PMI_FAIL - unable to return the maximum length
  483. Notes:
  484. This function returns the maximum length of a process group id string.
  485. @*/
  486. int PMI_Get_id_length_max( int *length )
  487. {
  488. if (pmi_debug)
  489. fprintf(stderr, "In: PMI_Get_id_length_max\n");
  490. if (length == NULL)
  491. return PMI_ERR_INVALID_ARG;
  492. *length = PMI_MAX_ID_LEN;
  493. return PMI_SUCCESS;
  494. }
  495. /*@
  496. PMI_Barrier - barrier across the process group
  497. Return values:
  498. + PMI_SUCCESS - barrier successfully finished
  499. - PMI_FAIL - barrier failed
  500. Notes:
  501. This function is a collective call across all processes in the process group
  502. the local process belongs to. It will not return until all the processes
  503. have called 'PMI_Barrier()'.
  504. @*/
  505. int PMI_Barrier( void )
  506. {
  507. struct kvs_comm_set *kvs_set_ptr = NULL;
  508. struct kvs_comm *kvs_ptr;
  509. int i, j, k, rc = PMI_SUCCESS;
  510. if (pmi_debug)
  511. fprintf(stderr, "In: PMI_Barrier\n");
  512. if (pmi_init == 0)
  513. return PMI_FAIL;
  514. /* Simple operation without srun (no-op) */
  515. if ((pmi_jobid == 0) && (pmi_stepid == 0))
  516. return rc;
  517. /* Issue the RPC */
  518. if (slurm_get_kvs_comm_set(&kvs_set_ptr, pmi_rank, pmi_size)
  519. != SLURM_SUCCESS)
  520. return PMI_FAIL;
  521. if (kvs_set_ptr == NULL)
  522. return PMI_SUCCESS;
  523. if (pmi_debug)
  524. fprintf(stderr, "Past PMI_Barrier\n");
  525. for (i=0; i<kvs_set_ptr->kvs_comm_recs; i++) {
  526. kvs_ptr = kvs_set_ptr->kvs_comm_ptr[i];
  527. for (j=0; j<kvs_ptr->kvs_cnt; j++) {
  528. k = _kvs_put(kvs_ptr->kvs_name,
  529. kvs_ptr->kvs_keys[j],
  530. kvs_ptr->kvs_values[j],
  531. 0);
  532. if (k != PMI_SUCCESS)
  533. rc = k;
  534. }
  535. }
  536. /* Release temporary storage from RPC */
  537. slurm_free_kvs_comm_set(kvs_set_ptr);
  538. return rc;
  539. }
  540. /*@
  541. PMI_Get_clique_size - obtain the number of processes on the local node
  542. Output Parameters:
  543. . size - pointer to an integer that receives the size of the clique
  544. Return values:
  545. + PMI_SUCCESS - size successfully obtained
  546. . PMI_ERR_INVALID_ARG - invalid argument
  547. - PMI_FAIL - unable to return the clique size
  548. Notes:
  549. This function returns the number of processes in the local process group that
  550. are on the local node along with the local process. This is a simple topology
  551. function to distinguish between processes that can communicate through IPC
  552. mechanisms (e.g., shared memory) and other network mechanisms.
  553. @*/
  554. int PMI_Get_clique_size( int *size )
  555. {
  556. char *env;
  557. if (pmi_debug)
  558. fprintf(stderr, "In: PMI_Get_clique_size\n");
  559. if (size == NULL)
  560. return PMI_ERR_INVALID_ARG;
  561. if (pmi_init == 0)
  562. return PMI_FAIL;
  563. /* Simple operation without srun */
  564. if ((pmi_jobid == 0) && (pmi_stepid == 0)) {
  565. *size = 1;
  566. return PMI_SUCCESS;
  567. }
  568. env = getenv("SLURM_GTIDS");
  569. if (env) {
  570. int i, tids=1;
  571. for (i=0; env[i]; i++) {
  572. if (env[i] == ',')
  573. tids++;
  574. }
  575. *size = tids;
  576. return PMI_SUCCESS;
  577. }
  578. return PMI_FAIL;
  579. }
  580. /*@
  581. PMI_Get_clique_ranks - get the ranks of the local processes in the process group
  582. Input Parameters:
  583. . length - length of the ranks array
  584. Output Parameters:
  585. . ranks - pointer to an array of integers that receive the local ranks
  586. Return values:
  587. + PMI_SUCCESS - ranks successfully obtained
  588. . PMI_ERR_INVALID_ARG - invalid argument
  589. . PMI_ERR_INVALID_LENGTH - invalid length argument
  590. - PMI_FAIL - unable to return the ranks
  591. Notes:
  592. This function returns the ranks of the processes on the local node. The array
  593. must be at least as large as the size returned by 'PMI_Get_clique_size()'. This
  594. is a simple topology function to distinguish between processes that can
  595. communicate through IPC mechanisms (e.g., shared memory) and other network
  596. mechanisms.
  597. @*/
  598. int PMI_Get_clique_ranks( int ranks[], int length )
  599. {
  600. char *env;
  601. if (pmi_debug)
  602. fprintf(stderr, "In: PMI_Get_clique_ranks\n");
  603. if (ranks == NULL)
  604. return PMI_ERR_INVALID_ARG;
  605. if (pmi_init == 0)
  606. return PMI_FAIL;
  607. /* Simple operation without srun */
  608. if ((pmi_jobid == 0) && (pmi_stepid == 0)) {
  609. if (length < 1)
  610. return PMI_ERR_INVALID_LENGTH;
  611. ranks[0] = 0;
  612. return PMI_SUCCESS;
  613. }
  614. env = getenv("SLURM_GTIDS");
  615. if (env) {
  616. int i = 0;
  617. char *tid, *tids, *last = NULL;
  618. tids = strdup(env);
  619. tid = strtok_r(tids, ",", &last);
  620. while (tid) {
  621. if (i >= length) {
  622. free(tids);
  623. return PMI_ERR_INVALID_LENGTH;
  624. }
  625. ranks[i++] = atoi(tid);
  626. tid = strtok_r(NULL, ",", &last);
  627. }
  628. free(tids);
  629. return PMI_SUCCESS;
  630. }
  631. return PMI_FAIL;
  632. }
  633. /*@
  634. PMI_Abort - abort the process group associated with this process
  635. Input Parameters:
  636. + exit_code - exit code to be returned by this process
  637. - error_msg - error message to be printed
  638. Return values:
  639. . none - this function should not return
  640. @*/
  641. int PMI_Abort(int exit_code, const char error_msg[])
  642. {
  643. if ((pmi_debug) || (error_msg != NULL)) {
  644. if (error_msg == NULL)
  645. error_msg = "NULL";
  646. fprintf(stderr, "In: PMI_Abort(%d, %s)\n", exit_code, error_msg);
  647. }
  648. if (pmi_init) {
  649. if ((pmi_jobid == 0) && (pmi_stepid == 0)) {
  650. /* Simple operation without srun */
  651. kill(0, SIGKILL);
  652. } else {
  653. slurm_kill_job_step((uint32_t) pmi_jobid,
  654. (uint32_t) pmi_stepid, SIGKILL);
  655. }
  656. }
  657. exit(exit_code);
  658. }
  659. /* PMI Keymap functions */
  660. /*@
  661. PMI_KVS_Get_my_name - obtain the name of the keyval space the local process
  662. group has access to
  663. Input Parameters:
  664. . length - length of the kvsname character array
  665. Output Parameters:
  666. . kvsname - a string that receives the keyval space name
  667. Return values:
  668. + PMI_SUCCESS - kvsname successfully obtained
  669. . PMI_ERR_INVALID_ARG - invalid argument
  670. . PMI_ERR_INVALID_LENGTH - invalid length argument
  671. - PMI_FAIL - unable to return the kvsname
  672. Notes:
  673. This function returns the name of the keyval space that this process and all
  674. other processes in the process group have access to. The output parameter,
  675. kvsname, must be at least as long as the value returned by
  676. 'PMI_KVS_Get_name_length_max()'.
  677. @*/
  678. int PMI_KVS_Get_my_name( char kvsname[], int length )
  679. {
  680. int size;
  681. if (pmi_debug)
  682. fprintf(stderr, "In: PMI_KVS_Get_my_name\n");
  683. if (kvsname == NULL)
  684. return PMI_ERR_INVALID_ARG;
  685. if (pmi_init == 0)
  686. return PMI_FAIL;
  687. size = snprintf(kvsname, length, "%ld.%ld", pmi_jobid, pmi_stepid);
  688. if (size >= length) /* truncated */
  689. return PMI_ERR_INVALID_LENGTH;
  690. pthread_mutex_lock(&kvs_mutex);
  691. _init_kvs(kvsname);
  692. pthread_mutex_unlock(&kvs_mutex);
  693. return PMI_SUCCESS;
  694. }
  695. static void _init_kvs( char kvsname[] )
  696. {
  697. int i;
  698. i = kvs_rec_cnt;
  699. kvs_rec_cnt++;
  700. kvs_recs = realloc(kvs_recs, (sizeof(struct kvs_rec) * kvs_rec_cnt));
  701. /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
  702. kvs_recs[i].kvs_name = malloc(PMI_MAX_KVSNAME_LEN);
  703. if (kvs_recs[i].kvs_name)
  704. strncpy(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN);
  705. kvs_recs[i].kvs_state = KVS_STATE_LOCAL;
  706. kvs_recs[i].kvs_cnt = 0;
  707. kvs_recs[i].kvs_inx = 0;
  708. kvs_recs[i].kvs_key_states = NULL;
  709. kvs_recs[i].kvs_keys = NULL;
  710. kvs_recs[i].kvs_values = NULL;
  711. }
  712. /*@
  713. PMI_KVS_Get_name_length_max - obtain the length necessary to store a kvsname
  714. Output Parameter:
  715. . length - maximum length required to hold a keyval space name
  716. Return values:
  717. + PMI_SUCCESS - length successfully set
  718. . PMI_ERR_INVALID_ARG - invalid argument
  719. - PMI_FAIL - unable to set the length
  720. Notes:
  721. This function returns the string length required to store a keyval space name.
  722. A routine is used rather than setting a maximum value in 'pmi.h' to allow
  723. different implementations of PMI to be used with the same executable. These
  724. different implementations may allow different maximum lengths; by using a
  725. routine here, we can interface with a variety of implementations of PMI.
  726. @*/
  727. int PMI_KVS_Get_name_length_max( int *length )
  728. {
  729. if (pmi_debug)
  730. fprintf(stderr, "In: PMI_KVS_Get_name_length_max\n");
  731. if (length == NULL)
  732. return PMI_ERR_INVALID_ARG;
  733. *length = PMI_MAX_KVSNAME_LEN;
  734. return PMI_SUCCESS;
  735. }
  736. /*@
  737. PMI_KVS_Get_key_length_max - obtain the length necessary to store a key
  738. Output Parameter:
  739. . length - maximum length required to hold a key string.
  740. Return values:
  741. + PMI_SUCCESS - length successfully set
  742. . PMI_ERR_INVALID_ARG - invalid argument
  743. - PMI_FAIL - unable to set the length
  744. Notes:
  745. This function returns the string length required to store a key.
  746. @*/
  747. int PMI_KVS_Get_key_length_max( int *length )
  748. {
  749. if (pmi_debug)
  750. fprintf(stderr, "In: PMI_KVS_Get_key_length_max\n");
  751. if (length == NULL)
  752. return PMI_ERR_INVALID_ARG;
  753. *length = PMI_MAX_KEY_LEN;
  754. return PMI_SUCCESS;
  755. }
  756. /*@
  757. PMI_KVS_Get_value_length_max - obtain the length necessary to store a value
  758. Output Parameter:
  759. . length - maximum length required to hold a keyval space value
  760. Return values:
  761. + PMI_SUCCESS - length successfully set
  762. . PMI_ERR_INVALID_ARG - invalid argument
  763. - PMI_FAIL - unable to set the length
  764. Notes:
  765. This function returns the string length required to store a value from a
  766. keyval space.
  767. @*/
  768. int PMI_KVS_Get_value_length_max( int *length )
  769. {
  770. if (pmi_debug)
  771. fprintf(stderr, "In: PMI_KVS_Get_value_length_max\n");
  772. if (length == NULL)
  773. return PMI_ERR_INVALID_ARG;
  774. *length = PMI_MAX_VAL_LEN;
  775. return PMI_SUCCESS;
  776. }
  777. /*@
  778. PMI_KVS_Create - create a new keyval space
  779. Input Parameter:
  780. . length - length of the kvsname character array
  781. Output Parameters:
  782. . kvsname - a string that receives the keyval space name
  783. Return values:
  784. + PMI_SUCCESS - keyval space successfully created
  785. . PMI_ERR_INVALID_ARG - invalid argument
  786. . PMI_ERR_INVALID_LENGTH - invalid length argument
  787. - PMI_FAIL - unable to create a new keyval space
  788. Notes:
  789. This function creates a new keyval space. Everyone in the same process group
  790. can access this keyval space by the name returned by this function. The
  791. function is not collective. Only one process calls this function. The output
  792. parameter, kvsname, must be at least as long as the value returned by
  793. 'PMI_KVS_Get_name_length_max()'.
  794. @*/
  795. int PMI_KVS_Create( char kvsname[], int length )
  796. {
  797. int size, rc;
  798. if (pmi_debug)
  799. fprintf(stderr, "In: PMI_KVS_Create\n");
  800. if (kvsname == NULL)
  801. return PMI_ERR_INVALID_ARG;
  802. if (pmi_init == 0)
  803. return PMI_FAIL;
  804. pthread_mutex_lock(&kvs_mutex);
  805. size = snprintf(kvsname, length, "%ld.%ld.%d.%d", pmi_jobid,
  806. pmi_stepid, pmi_rank, kvs_name_sequence);
  807. if (size >= length) /* truncated */
  808. rc = PMI_ERR_INVALID_LENGTH;
  809. else {
  810. kvs_name_sequence++;
  811. _init_kvs(kvsname);
  812. rc = PMI_SUCCESS;
  813. }
  814. pthread_mutex_unlock(&kvs_mutex);
  815. return rc;
  816. }
  817. /*@
  818. PMI_KVS_Destroy - destroy keyval space
  819. Input Parameters:
  820. . kvsname - keyval space name
  821. Return values:
  822. + PMI_SUCCESS - keyval space successfully destroyed
  823. . PMI_ERR_INVALID_ARG - invalid argument
  824. - PMI_FAIL - unable to destroy the keyval space
  825. Notes:
  826. This function destroys a keyval space created by 'PMI_KVS_Create()'.
  827. @*/
  828. int PMI_KVS_Destroy( const char kvsname[] )
  829. {
  830. int i, found = 0;
  831. if (pmi_debug)
  832. fprintf(stderr, "In: PMI_KVS_Destroy - NOT FULLY SUPPORTED\n");
  833. if (kvsname == NULL)
  834. return PMI_ERR_INVALID_ARG;
  835. pthread_mutex_lock(&kvs_mutex);
  836. for (i=0; i<kvs_rec_cnt; i++) {
  837. if (strncmp(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN))
  838. continue;
  839. kvs_recs[i].kvs_state = KVS_STATE_DEFUNCT;
  840. found = 1;
  841. break;
  842. }
  843. pthread_mutex_unlock(&kvs_mutex);
  844. if (found == 0)
  845. return PMI_ERR_INVALID_ARG;
  846. /* FIXME: We need to add mechanism to remove these keys from srun's master copy */
  847. return PMI_SUCCESS;
  848. }
  849. /*@
  850. PMI_KVS_Put - put a key/value pair in a keyval space
  851. Input Parameters:
  852. + kvsname - keyval space name
  853. . key - key
  854. - value - value
  855. Return values:
  856. + PMI_SUCCESS - keyval pair successfully put in keyval space
  857. . PMI_ERR_INVALID_KVS - invalid kvsname argument
  858. . PMI_ERR_INVALID_KEY - invalid key argument
  859. . PMI_ERR_INVALID_VAL - invalid val argument
  860. - PMI_FAIL - put failed
  861. Notes:
  862. This function puts the key/value pair in the specified keyval space. The
  863. value is not visible to other processes until 'PMI_KVS_Commit()' is called.
  864. The function may complete locally. After 'PMI_KVS_Commit()' is called, the
  865. value may be retrieved by calling 'PMI_KVS_Get()'. All keys put to a keyval
  866. space must be unique to the keyval space. You may not put more than once
  867. with the same key.
  868. @*/
  869. int PMI_KVS_Put( const char kvsname[], const char key[], const char value[])
  870. {
  871. if (pmi_debug)
  872. fprintf(stderr, "In: PMI_KVS_Put(%s:%s)\n", key, value);
  873. if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN))
  874. return PMI_ERR_INVALID_KVS;
  875. if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN))
  876. return PMI_ERR_INVALID_KEY;
  877. if ((value == NULL) || (strlen(value) > PMI_MAX_VAL_LEN))
  878. return PMI_ERR_INVALID_VAL;
  879. return _kvs_put(kvsname, key, value, 1);
  880. }
  881. static int _kvs_put( const char kvsname[], const char key[], const char value[],
  882. int local)
  883. {
  884. int i, j, rc;
  885. /* find the proper kvs record */
  886. pthread_mutex_lock(&kvs_mutex);
  887. for (i=0; i<kvs_rec_cnt; i++) {
  888. if (strncmp(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN))
  889. continue;
  890. if (pmi_kvs_no_dup_keys) {
  891. j = kvs_recs[i].kvs_cnt;
  892. goto no_dup;
  893. }
  894. /* search for duplicate key */
  895. for (j=0; j<kvs_recs[i].kvs_cnt; j++) {
  896. if (strncmp(kvs_recs[i].kvs_keys[j], key,
  897. PMI_MAX_KEY_LEN))
  898. continue;
  899. if (local)
  900. kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_LOCAL;
  901. /* else leave unchanged */
  902. /* replace the existing value */
  903. /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
  904. if (kvs_recs[i].kvs_values[j] == NULL)
  905. kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN);
  906. if (kvs_recs[i].kvs_values[j] == NULL)
  907. rc = PMI_FAIL; /* malloc error */
  908. else {
  909. rc = PMI_SUCCESS;
  910. strncpy(kvs_recs[i].kvs_values[j], value,
  911. PMI_MAX_VAL_LEN);
  912. }
  913. goto fini;
  914. }
  915. no_dup:
  916. /* create new key */
  917. kvs_recs[i].kvs_cnt++;
  918. kvs_recs[i].kvs_key_states = realloc(kvs_recs[i].kvs_key_states,
  919. (sizeof (uint16_t) * kvs_recs[i].kvs_cnt));
  920. kvs_recs[i].kvs_values = realloc(kvs_recs[i].kvs_values,
  921. (sizeof (char *) * kvs_recs[i].kvs_cnt));
  922. kvs_recs[i].kvs_keys = realloc(kvs_recs[i].kvs_keys,
  923. (sizeof (char *) * kvs_recs[i].kvs_cnt));
  924. if ((kvs_recs[i].kvs_key_states == NULL)
  925. || (kvs_recs[i].kvs_values == NULL)
  926. || (kvs_recs[i].kvs_keys == NULL)) {
  927. rc = PMI_FAIL; /* malloc error */
  928. goto fini;
  929. }
  930. if (local)
  931. kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_LOCAL;
  932. else
  933. kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_GLOBAL;
  934. /* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
  935. kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN);
  936. kvs_recs[i].kvs_keys[j] = malloc(PMI_MAX_KEY_LEN);
  937. if ((kvs_recs[i].kvs_values[j] == NULL)
  938. || (kvs_recs[i].kvs_keys[j] == NULL))
  939. rc = PMI_FAIL; /* malloc error */
  940. else {
  941. rc = PMI_SUCCESS;
  942. strncpy(kvs_recs[i].kvs_values[j], value,
  943. PMI_MAX_VAL_LEN);
  944. strncpy(kvs_recs[i].kvs_keys[j], key, PMI_MAX_KEY_LEN);
  945. }
  946. goto fini;
  947. }
  948. rc = PMI_ERR_INVALID_KVS;
  949. fini: pthread_mutex_unlock(&kvs_mutex);
  950. _kvs_dump();
  951. return rc;
  952. }
  953. /*@
  954. PMI_KVS_Commit - commit all previous puts to the keyval space
  955. Input Parameters:
  956. . kvsname - keyval space name
  957. Return values:
  958. + PMI_SUCCESS - commit succeeded
  959. . PMI_ERR_INVALID_ARG - invalid argument
  960. - PMI_FAIL - commit failed
  961. Notes:
  962. This function commits all previous puts since the last 'PMI_KVS_Commit()' into
  963. the specified keyval space. It is a process local operation.
  964. @*/
  965. int PMI_KVS_Commit( const char kvsname[] )
  966. {
  967. struct kvs_comm_set kvs_set;
  968. int i, j, rc = PMI_SUCCESS, local_pairs;
  969. if (pmi_debug)
  970. fprintf(stderr, "In: PMI_KVS_Commit\n");
  971. if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN))
  972. return PMI_ERR_INVALID_ARG;
  973. if (pmi_init == 0)
  974. return PMI_FAIL;
  975. /* Simple operation without srun (no-op) */
  976. if ((pmi_jobid == 0) && (pmi_stepid == 0))
  977. return rc;
  978. /* Pack records into RPC for sending to slurmd_step
  979. * NOTE: For performance reasons, we only send key-pairs
  980. * which have been locally set rather than the full key-pair
  981. * space. We do this by moving the local key-pairs to the
  982. * head of the list and sending the count of local entries
  983. * rather than the full set. */
  984. kvs_set.host_cnt = 1;
  985. kvs_set.kvs_host_ptr = malloc(sizeof(struct kvs_hosts));
  986. kvs_set.kvs_host_ptr->task_id = pmi_rank;
  987. kvs_set.kvs_host_ptr->port = 0;
  988. kvs_set.kvs_host_ptr->hostname = NULL;
  989. kvs_set.kvs_comm_recs = 0;
  990. kvs_set.kvs_comm_ptr = NULL;
  991. pthread_mutex_lock(&kvs_mutex);
  992. for (i=0; i<kvs_rec_cnt; i++) {
  993. if (kvs_recs[i].kvs_state == KVS_STATE_DEFUNCT)
  994. continue;
  995. local_pairs = 0;
  996. for (j=0; j<kvs_recs[i].kvs_cnt; j++) {
  997. if (kvs_recs[i].kvs_key_states[j] ==
  998. KVS_KEY_STATE_GLOBAL)
  999. continue;
  1000. if (local_pairs != j)
  1001. _kvs_swap(&kvs_recs[i], j, local_pairs);
  1002. local_pairs++;
  1003. }
  1004. if (local_pairs == 0)
  1005. continue;
  1006. kvs_set.kvs_comm_ptr = realloc(kvs_set.kvs_comm_ptr,
  1007. (sizeof(struct kvs_comm *) *
  1008. (kvs_set.kvs_comm_recs+1)));
  1009. kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs] =
  1010. malloc(sizeof(struct kvs_comm));
  1011. kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_name =
  1012. kvs_recs[i].kvs_name;
  1013. kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_cnt =
  1014. local_pairs;
  1015. kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_keys =
  1016. kvs_recs[i].kvs_keys;
  1017. kvs_set.kvs_comm_ptr[kvs_set.kvs_comm_recs]->kvs_values =
  1018. kvs_recs[i].kvs_values;
  1019. kvs_set.kvs_comm_recs++;
  1020. }
  1021. /* Send the RPC */
  1022. if (slurm_send_kvs_comm_set(&kvs_set, pmi_rank, pmi_size)
  1023. != SLURM_SUCCESS) {
  1024. rc = PMI_FAIL;
  1025. }
  1026. pthread_mutex_unlock(&kvs_mutex);
  1027. /* Free any temporary storage */
  1028. free(kvs_set.kvs_host_ptr);
  1029. for (i=0; i<kvs_set.kvs_comm_recs; i++)
  1030. free(kvs_set.kvs_comm_ptr[i]);
  1031. if (kvs_set.kvs_comm_ptr)
  1032. free(kvs_set.kvs_comm_ptr);
  1033. return rc;
  1034. }
  1035. static void _kvs_swap(struct kvs_rec *kvs_ptr, int inx1, int inx2)
  1036. {
  1037. char *tmp_char;
  1038. uint16_t tmp_16;
  1039. tmp_16 = kvs_ptr->kvs_key_states[inx1];
  1040. kvs_ptr->kvs_key_states[inx1] = kvs_ptr->kvs_key_states[inx2];
  1041. kvs_ptr->kvs_key_states[inx2] = tmp_16;
  1042. tmp_char = kvs_ptr->kvs_keys[inx1];
  1043. kvs_ptr->kvs_keys[inx1] = kvs_ptr->kvs_keys[inx2];
  1044. kvs_ptr->kvs_keys[inx2] = tmp_char;
  1045. tmp_char = kvs_ptr->kvs_values[inx1];
  1046. kvs_ptr->kvs_values[inx1] = kvs_ptr->kvs_values[inx2];
  1047. kvs_ptr->kvs_values[inx2] = tmp_char;
  1048. }
  1049. /*@
  1050. PMI_KVS_Get - get a key/value pair from a keyval space
  1051. Input Parameters:
  1052. + kvsname - keyval space name
  1053. . key - key
  1054. - length - length of value character array
  1055. Output Parameters:
  1056. . value - value
  1057. Return values:
  1058. + PMI_SUCCESS - get succeeded
  1059. . PMI_ERR_INVALID_KVS - invalid kvsname argument
  1060. . PMI_ERR_INVALID_KEY - invalid key argument
  1061. . PMI_ERR_INVALID_VAL - invalid val argument
  1062. . PMI_ERR_INVALID_LENGTH - invalid length argument
  1063. - PMI_FAIL - get failed
  1064. Notes:
  1065. This function gets the value of the specified key in the keyval space.
  1066. @*/
  1067. int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int length)
  1068. {
  1069. int i, j, rc;
  1070. if (pmi_debug)
  1071. fprintf(stderr, "In: PMI_KVS_Get(%s)\n", key);
  1072. if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN))
  1073. return PMI_ERR_INVALID_KVS;
  1074. if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN))
  1075. return PMI_ERR_INVALID_KEY;
  1076. if (value == NULL)
  1077. return PMI_ERR_INVALID_VAL;
  1078. /* find the proper kvs record */
  1079. pthread_mutex_lock(&kvs_mutex);
  1080. for (i=0; i<kvs_rec_cnt; i++) {
  1081. if (kvs_recs[i].kvs_state == KVS_STATE_DEFUNCT)
  1082. continue;
  1083. if (strncmp(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN))
  1084. continue;
  1085. for (j=0; j<kvs_recs[i].kvs_cnt; j++) {
  1086. if (strncmp(kvs_recs[i].kvs_keys[j], key,
  1087. PMI_MAX_KEY_LEN))
  1088. continue;
  1089. if (strlen(kvs_recs[i].kvs_values[j]) > (length-1))
  1090. rc = PMI_ERR_INVALID_LENGTH;
  1091. else {
  1092. strncpy(value, kvs_recs[i].kvs_values[j],
  1093. length);
  1094. rc = PMI_SUCCESS;
  1095. }
  1096. goto fini;
  1097. }
  1098. rc = PMI_ERR_INVALID_KEY;
  1099. goto fini;
  1100. }
  1101. rc = PMI_ERR_INVALID_KVS;
  1102. fini: pthread_mutex_unlock(&kvs_mutex);
  1103. return rc;
  1104. }
  1105. /*@
  1106. PMI_KVS_Iter_first - initialize the iterator and get the first value
  1107. Input Parameters:
  1108. + kvsname - keyval space name
  1109. . key_len - length of key character array
  1110. - val_len - length of val character array
  1111. Output Parameters:
  1112. + key - key
  1113. - value - value
  1114. Return values:
  1115. + PMI_SUCCESS - keyval pair successfully retrieved from the keyval space
  1116. . PMI_ERR_INVALID_KVS - invalid kvsname argument
  1117. . PMI_ERR_INVALID_KEY - invalid key argument
  1118. . PMI_ERR_INVALID_KEY_LENGTH - invalid key length argument
  1119. . PMI_ERR_INVALID_VAL - invalid val argument
  1120. . PMI_ERR_INVALID_VAL_LENGTH - invalid val length argument
  1121. - PMI_FAIL - failed to initialize the iterator and get the first keyval pair
  1122. Notes:
  1123. This function initializes the iterator for the specified keyval space and
  1124. retrieves the first key/val pair. The end of the keyval space is specified
  1125. by returning an empty key string. key and val must be at least as long as
  1126. the values returned by 'PMI_KVS_Get_key_length_max()' and
  1127. 'PMI_KVS_Get_value_length_max()'.
  1128. @*/
  1129. int PMI_KVS_Iter_first(const char kvsname[], char key[], int key_len, char val[], int val_len)
  1130. {
  1131. int i, rc;
  1132. if (pmi_debug)
  1133. fprintf(stderr, "In: PMI_KVS_Iter_first\n");
  1134. if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN))
  1135. return PMI_ERR_INVALID_KVS;
  1136. if (key == NULL)
  1137. return PMI_ERR_INVALID_KEY;
  1138. if (val == NULL)
  1139. return PMI_ERR_INVALID_VAL;
  1140. /* find the proper kvs record */
  1141. pthread_mutex_lock(&kvs_mutex);
  1142. for (i=0; i<kvs_rec_cnt; i++) {
  1143. if (kvs_recs[i].kvs_state == KVS_STATE_DEFUNCT)
  1144. continue;
  1145. if (strncmp(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN))
  1146. continue;
  1147. kvs_recs[i].kvs_inx = 0;
  1148. if (kvs_recs[i].kvs_inx >= kvs_recs[i].kvs_cnt) {
  1149. key[0] = '\0';
  1150. val[0] = '\0';
  1151. rc = PMI_SUCCESS;
  1152. } else if (strlen(kvs_recs[i].kvs_keys[kvs_recs[i].kvs_inx]) >
  1153. (key_len-1)) {
  1154. rc = PMI_ERR_INVALID_KEY_LENGTH;
  1155. } else if (strlen(kvs_recs[i].kvs_values[kvs_recs[i].kvs_inx]) >
  1156. (val_len-1)) {
  1157. rc = PMI_ERR_INVALID_VAL_LENGTH;
  1158. } else {
  1159. strncpy(key, kvs_recs[i].kvs_keys[kvs_recs[i].kvs_inx],
  1160. key_len);
  1161. strncpy(val,
  1162. kvs_recs[i].kvs_values[kvs_recs[i].kvs_inx],
  1163. val_len);
  1164. rc = PMI_SUCCESS;
  1165. }
  1166. goto fini;
  1167. }
  1168. rc = PMI_ERR_INVALID_KVS;
  1169. fini: pthread_mutex_unlock(&kvs_mutex);
  1170. return rc;
  1171. }
  1172. /*@
  1173. PMI_KVS_Iter_next - get the next keyval pair from the keyval space
  1174. Input Parameters:
  1175. + kvsname - keyval space name
  1176. . key_len - length of key character array
  1177. - val_len - length of val character array
  1178. Output Parameters:
  1179. + key - key
  1180. - value - value
  1181. Return values:
  1182. + PMI_SUCCESS - keyval pair successfully retrieved from the keyval space
  1183. . PMI_ERR_INVALID_KVS - invalid kvsname argument
  1184. . PMI_ERR_INVALID_KEY - invalid key argument
  1185. . PMI_ERR_INVALID_KEY_LENGTH - invalid key length argument
  1186. . PMI_ERR_INVALID_VAL - invalid val argument
  1187. . PMI_ERR_INVALID_VAL_LENGTH - invalid val length argument
  1188. - PMI_FAIL - failed to get the next keyval pair
  1189. Notes:
  1190. This function retrieves the next keyval pair from the specified keyval space.
  1191. 'PMI_KVS_Iter_first()' must have been previously called. The end of the keyval
  1192. space is specified by returning an empty key string. The output parameters,
  1193. key and val, must be at least as long as the values returned by
  1194. 'PMI_KVS_Get_key_length_max()' and 'PMI_KVS_Get_value_length_max()'.
  1195. @*/
  1196. int PMI_KVS_Iter_next(const char kvsname[], char key[], int key_len,
  1197. char val[], int val_len)
  1198. {
  1199. int i, rc;
  1200. if (pmi_debug)
  1201. fprintf(stderr, "In: PMI_KVS_Iter_next\n");
  1202. if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN))
  1203. return PMI_ERR_INVALID_KVS;
  1204. if (key == NULL)
  1205. return PMI_ERR_INVALID_KEY;
  1206. if (val == NULL)
  1207. return PMI_ERR_INVALID_VAL;
  1208. /* find the proper kvs record */
  1209. pthread_mutex_lock(&kvs_mutex);
  1210. for (i=0; i<kvs_rec_cnt; i++) {
  1211. if (kvs_recs[i].kvs_state == KVS_STATE_DEFUNCT)
  1212. continue;
  1213. if (strncmp(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN))
  1214. continue;
  1215. kvs_recs[i].kvs_inx++;
  1216. if (kvs_recs[i].kvs_inx >= kvs_recs[i].kvs_cnt) {
  1217. key[0] = '\0';
  1218. val[0] = '\0';
  1219. rc = PMI_SUCCESS;
  1220. } else if (strlen(kvs_recs[i].kvs_keys[kvs_recs[i].kvs_inx]) >
  1221. (key_len-1)) {
  1222. rc = PMI_ERR_INVALID_KEY_LENGTH;
  1223. } else if (strlen(kvs_recs[i].kvs_values[kvs_recs[i].kvs_inx]) >
  1224. (val_len-1)) {
  1225. rc = PMI_ERR_INVALID_VAL_LENGTH;
  1226. } else {
  1227. strncpy(key, kvs_recs[i].kvs_keys[kvs_recs[i].kvs_inx],
  1228. key_len);
  1229. strncpy(val,
  1230. kvs_recs[i].kvs_values[kvs_recs[i].kvs_inx],
  1231. val_len);
  1232. rc = PMI_SUCCESS;
  1233. }
  1234. goto fini;
  1235. }
  1236. rc = PMI_ERR_INVALID_KVS;
  1237. fini: pthread_mutex_unlock(&kvs_mutex);
  1238. return rc;
  1239. }
  1240. /* PMI Process Creation functions */
  1241. /*@
  1242. PMI_Spawn_multiple - spawn a new set of processes
  1243. Input Parameters:
  1244. + count - count of commands
  1245. . cmds - array of command strings
  1246. . argvs - array of argv arrays for each command string
  1247. . maxprocs - array of maximum processes to spawn for each command string
  1248. . info_keyval_sizes - array giving the number of elements in each of the
  1249. 'info_keyval_vectors'
  1250. . info_keyval_vectors - array of keyval vector arrays
  1251. . preput_keyval_size - Number of elements in 'preput_keyval_vector'
  1252. - preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space
  1253. Output Parameter:
  1254. . errors - array of errors for each command
  1255. Return values:
  1256. + PMI_SUCCESS - spawn successful
  1257. . PMI_ERR_INVALID_ARG - invalid argument
  1258. - PMI_FAIL - spawn failed
  1259. Notes:
  1260. This function spawns a set of processes into a new process group. The 'count'
  1261. field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs',
  1262. 'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers
  1263. to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector'
  1264. contains keyval pairs that will be put in the keyval space of the newly
  1265. created process group before the processes are started. The 'maxprocs' array
  1266. specifies the desired number of processes to create for each 'cmd' string.
  1267. The actual number of processes may be less than the numbers specified in
  1268. maxprocs. The acceptable number of processes spawned may be controlled by
  1269. ``soft'' keyvals in the info arrays. The ``soft'' option is specified by
  1270. mpiexec in the MPI-2 standard. Environment variables may be passed to the
  1271. spawned processes through PMI implementation specific 'info_keyval' parameters.
  1272. @*/
  1273. int PMI_Spawn_multiple(int count,
  1274. const char * cmds[],
  1275. const char ** argvs[],
  1276. const int maxprocs[],
  1277. const int info_keyval_sizesp[],
  1278. const PMI_keyval_t * info_keyval_vectors[],
  1279. int preput_keyval_size,
  1280. const PMI_keyval_t preput_keyval_vector[],
  1281. int errors[])
  1282. {
  1283. if (pmi_debug)
  1284. fprintf(stderr, "In: PMI_Spawn_multiple - NOT SUPPORTED\n");
  1285. if (cmds == NULL)
  1286. return PMI_ERR_INVALID_ARG;
  1287. /* FIXME */
  1288. return PMI_FAIL;
  1289. }
  1290. /*@
  1291. PMI_Parse_option - create keyval structures from a single command line argument
  1292. Input Parameters:
  1293. + num_args - length of args array
  1294. - args - array of command line arguments starting with the argument to be parsed
  1295. Output Parameters:
  1296. + num_parsed - number of elements of the argument array parsed
  1297. . keyvalp - pointer to an array of keyvals
  1298. - size - size of the allocated array
  1299. Return values:
  1300. + PMI_SUCCESS - success
  1301. . PMI_ERR_INVALID_NUM_ARGS - invalid number of arguments
  1302. . PMI_ERR_INVALID_ARGS - invalid args argument
  1303. . PMI_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument
  1304. . PMI_ERR_INVALID_KEYVALP - invalid keyvalp argument
  1305. . PMI_ERR_INVALID_SIZE - invalid size argument
  1306. - PMI_FAIL - fail
  1307. Notes:
  1308. This function removes one PMI specific argument from the command line and
  1309. creates the corresponding 'PMI_keyval_t' structure for it. It returns
  1310. an array and size to the caller. The array must be freed by 'PMI_Free_keyvals()'.
  1311. If the first element of the args array is not a PMI specific argument, the
  1312. function returns success and sets num_parsed to zero. If there are multiple PMI
  1313. specific arguments in the args array, this function may parse more than one
  1314. argument as long as the options are contiguous in the args array.
  1315. @*/
  1316. int PMI_Parse_option(int num_args, char *args[], int *num_parsed,
  1317. PMI_keyval_t **keyvalp,
  1318. int *size)
  1319. {
  1320. int i, n, s, len;
  1321. char *cp, *kp, *vp;
  1322. PMI_keyval_t *temp;
  1323. if (pmi_debug)
  1324. fprintf(stderr, "In: PMI_Parse_option - \n");
  1325. if (num_parsed == NULL)
  1326. return PMI_ERR_INVALID_NUM_PARSED;
  1327. if (keyvalp == NULL)
  1328. return PMI_ERR_INVALID_KEYVALP;
  1329. if (size == NULL)
  1330. return PMI_ERR_INVALID_SIZE;
  1331. i = 0;
  1332. n = 0;
  1333. s = 0;
  1334. cp = args[0];
  1335. temp = (PMI_keyval_t *) malloc(num_args * (sizeof (PMI_keyval_t)));
  1336. if (temp == NULL)
  1337. return PMI_FAIL;
  1338. cp = args[0];
  1339. while (i < num_args) {
  1340. while (*cp == ' ') cp++;
  1341. n++; // number of array elements processed
  1342. kp = cp; // keyword start here
  1343. while (*cp != ' ' && *cp != '=' && *cp != '\n' && *cp != '\0')
  1344. cp++;
  1345. if (*cp != '=') {
  1346. n++;
  1347. break;
  1348. }
  1349. len = cp - kp;
  1350. temp[s].key = (char *) malloc((len+1) * sizeof (char));
  1351. if (temp[s].key == NULL) {
  1352. temp[s].val = NULL;
  1353. PMI_Free_keyvals(temp, s);
  1354. return PMI_FAIL;
  1355. }
  1356. strncpy(temp[s].key, kp, len);
  1357. temp[s].key[len] = '\0';
  1358. if (!IsPmiKey(temp[s].key)) {
  1359. free(temp[s].key);
  1360. temp[s].key = NULL;
  1361. break;
  1362. }
  1363. vp = ++cp;
  1364. while (*cp != ' ' && *cp != '\n' && *cp != '\0')
  1365. cp++;
  1366. len = cp - vp + 1;
  1367. temp[s].val = (char *) malloc((len+1) * sizeof (char));
  1368. if (temp[s].val == NULL) {
  1369. PMI_Free_keyvals(temp, s+1);
  1370. return PMI_FAIL;
  1371. }
  1372. strncpy(temp[s].val, vp, len);
  1373. temp[s].val[len] = '\0';
  1374. s++;
  1375. i++; // try next args
  1376. cp = args[i];
  1377. }
  1378. if (s == 0) {
  1379. free(temp);
  1380. temp = NULL;
  1381. }
  1382. *keyvalp = temp;
  1383. *num_parsed = n;
  1384. *size = s;
  1385. return PMI_SUCCESS;
  1386. }
  1387. /*@
  1388. PMI_Args_to_keyval - create keyval structures from command line arguments
  1389. Input Parameters:
  1390. + argcp - pointer to argc
  1391. - argvp - pointer to argv
  1392. Output Parameters:
  1393. + keyvalp - pointer to an array of keyvals
  1394. - size - size of the allocated array
  1395. Return values:
  1396. + PMI_SUCCESS - success
  1397. . PMI_ERR_INVALID_ARG - invalid argument
  1398. - PMI_FAIL - fail
  1399. Notes:
  1400. This function removes PMI specific arguments from the command line and
  1401. creates the corresponding 'PMI_keyval_t' structures for them. It returns
  1402. an array and size to the caller that can then be passed to 'PMI_Spawn_multiple()'.
  1403. The array can be freed by 'PMI_Free_keyvals()'. The routine 'free()' should
  1404. not be used to free this array as there is no requirement that the array be
  1405. allocated with 'malloc()'.
  1406. @*/
  1407. /* Assume it is the standard c input argument format, i.e.,
  1408. argcp points to number of arguments
  1409. argvp points to the number of array of arguments, with argv[0] is the cmd
  1410. argv[1], argv[2]... are the keyword/argument pair.
  1411. */
  1412. int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp,
  1413. int *size)
  1414. {
  1415. int i, j, cnt;
  1416. PMI_keyval_t *temp;
  1417. char **argv;
  1418. if (pmi_debug)
  1419. fprintf(stderr, "In: PMI_Args_to_keyval \n");
  1420. if ((keyvalp == NULL) || (size == NULL) ||
  1421. (argcp == NULL) || (argvp == NULL))
  1422. return PMI_ERR_INVALID_ARG;
  1423. cnt = *argcp;
  1424. argv = *argvp;
  1425. if (cnt == 0)
  1426. return PMI_ERR_INVALID_ARG;
  1427. temp = (PMI_keyval_t *) malloc(cnt * (sizeof (PMI_keyval_t)));
  1428. if (temp == NULL)
  1429. return PMI_FAIL;
  1430. j = 0;
  1431. i = 0;
  1432. if (argv[i][0] != '-') {
  1433. temp[j].val = (char *) malloc((strlen(argv[i])+1) * sizeof (char));
  1434. if (temp[j].val == NULL) {
  1435. temp[j].key = NULL;
  1436. PMI_Free_keyvals(temp, j);
  1437. return PMI_FAIL;
  1438. }
  1439. strcpy(temp[j].val, argv[i]);
  1440. temp[i].key=NULL;
  1441. --cnt;
  1442. ++j;
  1443. ++i;
  1444. }
  1445. while (cnt) {
  1446. if (argv[i][0] == '-') {
  1447. temp[j].key = (char *) malloc((strlen(argv[i])+1) *
  1448. sizeof (char));
  1449. if (temp[j].key == NULL) {
  1450. temp[j].val = NULL;
  1451. PMI_Free_keyvals(temp, j);
  1452. return PMI_FAIL;
  1453. }
  1454. strcpy(temp[j].key, argv[i]);
  1455. ++i;
  1456. --cnt;
  1457. if ((cnt) && (argv[i][0] != '-')){
  1458. temp[j].val = (char *) malloc(
  1459. (strlen(argv[i])+1) *
  1460. sizeof (char));
  1461. if (temp[j].val == NULL) {
  1462. PMI_Free_keyvals(temp, j+1);
  1463. return PMI_FAIL;
  1464. }
  1465. strcpy(temp[j].val, argv[i]);
  1466. i++;
  1467. --cnt;
  1468. } else {
  1469. temp[j].val = NULL;
  1470. }
  1471. j++;
  1472. } else {
  1473. PMI_Free_keyvals(temp, j);
  1474. return PMI_ERR_INVALID_ARG;
  1475. }
  1476. }
  1477. *size = j;
  1478. *keyvalp = temp;
  1479. return PMI_SUCCESS;
  1480. }
  1481. /*@
  1482. PMI_Free_keyvals - free the keyval structures created by PMI_Args_to_keyval
  1483. Input Parameters:
  1484. + keyvalp - array of keyvals
  1485. - size - size of the array
  1486. Return values:
  1487. + PMI_SUCCESS - success
  1488. . PMI_ERR_INVALID_ARG - invalid argument
  1489. - PMI_FAIL - fail
  1490. Notes:
  1491. This function frees the data returned by 'PMI_Args_to_keyval' and 'PMI_Parse_option'.
  1492. Using this routine instead of 'free' allows the PMI package to track
  1493. allocation of storage or to use interal storage as it sees fit.
  1494. @*/
  1495. int PMI_Free_keyvals(PMI_keyval_t keyvalp[], int size)
  1496. {
  1497. int i;
  1498. if (pmi_debug)
  1499. fprintf(stderr, "In: PMI_Free_keyvals \n");
  1500. if (((keyvalp == NULL) && size) || (size < 0))
  1501. return PMI_ERR_INVALID_ARG;
  1502. if (size == 0) {
  1503. if (keyvalp != NULL)
  1504. free(keyvalp);
  1505. return PMI_SUCCESS;
  1506. }
  1507. for (i=0; i<size; i++) {
  1508. if ((keyvalp[i].key) != NULL)
  1509. free(keyvalp[i].key);
  1510. if ((keyvalp[i].val) != NULL)
  1511. free(keyvalp[i].val);
  1512. }
  1513. free(keyvalp);
  1514. return PMI_SUCCESS;
  1515. }
  1516. /*@
  1517. PMI_Get_options - get a string of command line argument descriptions that may be printed
  1518. to the user
  1519. Input Parameters:
  1520. . length - length of str
  1521. Output Parameters:
  1522. + str - description string
  1523. - length - length of string or necessary length if input is not large enough
  1524. Return values:
  1525. + PMI_SUCCESS - success
  1526. . PMI_ERR_INVALID_ARG - invalid argument
  1527. . PMI_ERR_INVALID_LENGTH - invalid length argument
  1528. . PMI_ERR_NOMEM - input length too small
  1529. - PMI_FAIL - fail
  1530. Notes:
  1531. This function returns the command line options specific to the pmi implementation
  1532. @*/
  1533. int PMI_Get_options(char *str, int *length)
  1534. {
  1535. int optlen;
  1536. if (pmi_debug)
  1537. fprintf(stderr, "In: PMI_Get_options \n");
  1538. if ((str == NULL) || (length == NULL))
  1539. return PMI_ERR_INVALID_ARG;
  1540. optlen = strlen(pmi_opt_str);
  1541. if (*length <= optlen) {
  1542. strncpy(str, pmi_opt_str, *length-1);
  1543. str[*length-1] = '\0';
  1544. return PMI_ERR_NOMEM;
  1545. }
  1546. strcpy(str, pmi_opt_str);
  1547. return PMI_SUCCESS;
  1548. }
  1549. static int IsPmiKey(char * key) {
  1550. char strh[5];
  1551. if (pmi_debug)
  1552. fprintf(stderr, "In: IsPmiKey \n");
  1553. strncpy(strh, key, 4);
  1554. strh[4]='\0';
  1555. if (!strcmp(strh, "PMI_") && (strlen(key) > 4)) {
  1556. return 1;
  1557. }
  1558. /* add code to test special key if needed */
  1559. return 0;
  1560. }
  1561. inline static void _kvs_dump(void)
  1562. {
  1563. #if _DEBUG
  1564. int i, j;
  1565. for (i=0; i<kvs_rec_cnt; i++) {
  1566. info("name=%s state=%u cnt=%u inx=%u",
  1567. kvs_recs[i].kvs_name, kvs_recs[i].kvs_state,
  1568. kvs_recs[i].kvs_cnt, kvs_recs[i].kvs_inx);
  1569. for (j=0; j<kvs_recs[i].kvs_cnt; j++) {
  1570. info(" state=%u key=%s value=%s",
  1571. kvs_recs[i].kvs_key_states[j],
  1572. kvs_recs[i].kvs_keys[j],
  1573. kvs_recs[i].kvs_values[j]);
  1574. }
  1575. }
  1576. #endif
  1577. }