/src/modules/qcmd.c

https://code.google.com/ · C · 545 lines · 364 code · 66 blank · 115 comment · 90 complexity · 6f693675132fec5db51e2b8de88db2d1 MD5 · raw file

  1. /*****************************************************************************\
  2. * $Id$
  3. *****************************************************************************
  4. * Copyright (C) 2001-2006 The Regents of the University of California.
  5. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6. * Written by Jim Garlick <garlick@llnl.gov>.
  7. * UCRL-CODE-2003-005.
  8. *
  9. * This file is part of Pdsh, a parallel remote shell program.
  10. * For details, see <http://www.llnl.gov/linux/pdsh/>.
  11. *
  12. * Pdsh is free software; you can redistribute it and/or modify it under
  13. * the terms of the GNU General Public License as published by the Free
  14. * Software Foundation; either version 2 of the License, or (at your option)
  15. * any later version.
  16. *
  17. * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
  18. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  19. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  20. * details.
  21. *
  22. * You should have received a copy of the GNU General Public License along
  23. * with Pdsh; if not, write to the Free Software Foundation, Inc.,
  24. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  25. \*****************************************************************************/
  26. /*
  27. * This code is based on the BSD rcmd.c with MT safety added, and the
  28. * interface changed. Original UC regents header included below.
  29. */
  30. /*
  31. * Copyright (c) 1983, 1993, 1994
  32. * The Regents of the University of California. All rights reserved.
  33. *
  34. * Redistribution and use in source and binary forms, with or without
  35. * modification, are permitted provided that the following conditions
  36. * are met:
  37. * 1. Redistributions of source code must retain the above copyright
  38. * notice, this list of conditions and the following disclaimer.
  39. * 2. Redistributions in binary form must reproduce the above copyright
  40. * notice, this list of conditions and the following disclaimer in the
  41. * documentation and/or other materials provided with the distribution.
  42. * 3. All advertising materials mentioning features or use of this software
  43. * must display the following acknowledgement:
  44. * This product includes software developed by the University of
  45. * California, Berkeley and its contributors.
  46. * 4. Neither the name of the University nor the names of its contributors
  47. * may be used to endorse or promote products derived from this software
  48. * without specific prior written permission.
  49. *
  50. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  51. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  54. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  55. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  56. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  57. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  58. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  59. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  60. * SUCH DAMAGE.
  61. */
  62. #if defined(LIBC_SCCS) && !defined(lint)
  63. static char sccsid[] = "@(#)rcmd.c 8.3 (Berkeley) 3/26/94";
  64. #endif /* LIBC_SCCS and not lint */
  65. #if HAVE_CONFIG_H
  66. #include "config.h"
  67. #endif
  68. #include <sys/param.h>
  69. #include <sys/types.h>
  70. #include <sys/time.h>
  71. #include <sys/socket.h>
  72. #include <sys/stat.h>
  73. #if HAVE_PTHREAD_H
  74. #include <pthread.h>
  75. #endif
  76. #include <netinet/in.h>
  77. #include <arpa/inet.h>
  78. #include <signal.h>
  79. #if HAVE_FCNTL_H
  80. #include <fcntl.h>
  81. #endif
  82. #include <netdb.h>
  83. #if HAVE_UNISTD_H
  84. #include <unistd.h>
  85. #endif
  86. #include <pwd.h>
  87. #include <errno.h>
  88. #include <ctype.h>
  89. #include <string.h>
  90. #include <stdio.h>
  91. #include <string.h>
  92. #include <stdlib.h>
  93. #include <elan3/elanvp.h>
  94. #include "src/common/xmalloc.h"
  95. #include "src/common/xstring.h"
  96. #include "src/common/list.h"
  97. #include "src/common/err.h"
  98. #include "src/common/macros.h" /* LINEBUFSIZE */
  99. #include "src/common/xpoll.h"
  100. #include "src/pdsh/mod.h"
  101. #include "src/pdsh/privsep.h"
  102. #include "src/qsnet/qswutil.h"
  103. #define QSHELL_PORT 523
  104. #if HAVE_GETHOSTBYNAME_R
  105. #define HBUF_LEN 1024
  106. #endif
  107. #if STATIC_MODULES
  108. # define pdsh_module_info qcmd_module_info
  109. # define pdsh_module_priority qcmd_module_priority
  110. #endif
  111. int pdsh_module_priority = DEFAULT_MODULE_PRIORITY;
  112. extern char **environ;
  113. static bool dist_set = false;
  114. static bool cyclic = false;
  115. static int nprocs = 1;
  116. static unsigned int railmask = 1;
  117. static bool railmask_set = false;
  118. static char cwd[MAXPATHLEN + 1];
  119. static qsw_info_t qinfo;
  120. static ELAN_CAPABILITY cap;
  121. static int qcmd_postop(opt_t *opt);
  122. static int qcmd_opt_m(opt_t *, int, char *);
  123. static int qcmd_opt_n(opt_t *, int, char *);
  124. static int qcmd_opt_r(opt_t *, int, char *);
  125. static int qcmd_init(opt_t *);
  126. static int qcmd_signal(int, void *, int);
  127. static int qcmd(char *, char *, char *, char *, char *, int, int *, void **);
  128. /*
  129. * Export generic pdsh module operations
  130. */
  131. struct pdsh_module_operations qcmd_module_ops = {
  132. (ModInitF) NULL,
  133. (ModExitF) NULL,
  134. (ModReadWcollF) NULL,
  135. (ModPostOpF) qcmd_postop
  136. };
  137. /*
  138. * Export rcmd module operations
  139. */
  140. struct pdsh_rcmd_operations qcmd_rcmd_ops = {
  141. (RcmdInitF) qcmd_init,
  142. (RcmdSigF) qcmd_signal,
  143. (RcmdF) qcmd,
  144. };
  145. /*
  146. * Export module options
  147. */
  148. struct pdsh_module_option qcmd_module_options[] =
  149. { { 'm', "block|cyclic", "(qshell) control assignment of procs to nodes",
  150. DSH, (optFunc) qcmd_opt_m },
  151. { 'n', "n", "(qshell) set number of tasks per node",
  152. DSH, (optFunc) qcmd_opt_n },
  153. { 'r', "railmask", "(qshell) set railmask for job on multirail system",
  154. DSH, (optFunc) qcmd_opt_r },
  155. PDSH_OPT_TABLE_END
  156. };
  157. /*
  158. * Qcmd module info
  159. */
  160. struct pdsh_module pdsh_module_info = {
  161. "rcmd",
  162. "qsh",
  163. "Jim Garlick <garlick@llnl.gov>",
  164. "Run MPI jobs over QsNet",
  165. DSH,
  166. &qcmd_module_ops,
  167. &qcmd_rcmd_ops,
  168. &qcmd_module_options[0],
  169. };
  170. static int
  171. qcmd_opt_m(opt_t *pdsh_opts, int opt, char *arg)
  172. {
  173. if (strcmp(arg, "block") == 0)
  174. cyclic = false;
  175. else if (strcmp(arg, "cyclic") == 0)
  176. cyclic = true;
  177. else
  178. return -1;
  179. dist_set = true;
  180. return 0;
  181. }
  182. static int
  183. qcmd_opt_n(opt_t *pdsh_opts, int opt, char *arg)
  184. {
  185. nprocs = atoi(arg);
  186. return 0;
  187. }
  188. static int
  189. qcmd_opt_r(opt_t *pdsh_opts, int opt, char *arg)
  190. {
  191. char *p = NULL;
  192. long int val = strtol (arg, &p, 0);
  193. if (*p != '\0')
  194. errx ("%p: Invalid value for railmask: \"%s\"\n", arg);
  195. railmask = (unsigned int) val;
  196. railmask_set = true;
  197. return (0);
  198. }
  199. /*
  200. * Use rcmd backchannel to propagate signals.
  201. * efd (IN) file descriptor connected socket (-1 if not used)
  202. * signum (IN) signal number to send
  203. */
  204. static int qcmd_signal(int efd, void *arg, int signum)
  205. {
  206. char c;
  207. if (efd >= 0) {
  208. /* set non-blocking mode for write - just take our best shot */
  209. if (fcntl(efd, F_SETFL, O_NONBLOCK) < 0)
  210. err("%p: fcntl: %m\n");
  211. c = (char) signum;
  212. write(efd, &c, 1);
  213. }
  214. return 0;
  215. }
  216. static int qcmd_postop(opt_t *opt)
  217. {
  218. int errors = 0;
  219. if (strcmp(opt->rcmd_name, "qsh") == 0) {
  220. if (opt->fanout != DFLT_FANOUT && opt->wcoll != NULL) {
  221. if (opt->fanout != hostlist_count(opt->wcoll)) {
  222. err("%p: fanout must = target node list length \"-R qsh\"\n");
  223. errors++;
  224. }
  225. }
  226. if (nprocs <= 0) {
  227. err("%p: -n should be > 0\n");
  228. errors++;
  229. }
  230. if ((railmask == 0) || (railmask > QSW_RAILMASK_MAX)) {
  231. err ("%p: qcmd: invalid value %d for -r (railmask)\n", railmask);
  232. errors++;
  233. }
  234. } else {
  235. if (nprocs != 1) {
  236. err("%p: -n can only be specified with \"-R qsh\"\n");
  237. errors++;
  238. }
  239. if (dist_set) {
  240. err("%p: -m may only be specified with \"-R qsh\"\n");
  241. errors++;
  242. }
  243. if (railmask_set) {
  244. err("%p: qcmd: -r may only be specified with -R mqsh\n");
  245. errors++;
  246. }
  247. }
  248. return errors;
  249. }
  250. static void
  251. _qcmd_opt_init(opt_t *opt)
  252. {
  253. if (opt->fanout == DFLT_FANOUT && opt->wcoll != NULL)
  254. opt->fanout = hostlist_count(opt->wcoll);
  255. else {
  256. err("%p: qcmd: Unable to set appropriate fanout\n");
  257. exit(1);
  258. }
  259. opt->labels = false;
  260. opt->kill_on_fail = true;
  261. if (opt->dshpath != NULL)
  262. Free((void **) &opt->dshpath);
  263. }
  264. /*
  265. * Intialize elan capability and info structures that will be used when
  266. * running the job.
  267. * wcoll (IN) list of nodes
  268. */
  269. static int qcmd_init(opt_t * opt)
  270. {
  271. int totprocs = nprocs * hostlist_count(opt->wcoll);
  272. if (qsw_init() < 0)
  273. exit(1);
  274. /*
  275. * Verify constraints for running Elan jobs
  276. * and initialize options.
  277. */
  278. _qcmd_opt_init(opt);
  279. if (getcwd(cwd, sizeof(cwd)) == NULL) /* cache working directory */
  280. errx("%p: getcwd failed: %m\n");
  281. /* initialize Elan capability structure. */
  282. if (qsw_init_capability(&cap, totprocs, opt->wcoll, cyclic, railmask) < 0)
  283. errx("%p: failed to initialize Elan capability\n");
  284. /* initialize elan info structure */
  285. qinfo.prgnum = qsw_get_prgnum(); /* call after qsw_init_capability */
  286. qinfo.nnodes = hostlist_count(opt->wcoll);
  287. qinfo.nprocs = totprocs;
  288. qinfo.nodeid = qinfo.procid = qinfo.rank = 0;
  289. qsw_fini();
  290. return 0;
  291. }
  292. /*
  293. * Send extra arguments to qshell server
  294. * s (IN) socket
  295. * nodeid (IN) node index for this connection
  296. */
  297. static int _qcmd_send_extra_args(int s, int nodeid)
  298. {
  299. char **ep;
  300. char tmpstr[1024];
  301. int count = 0;
  302. int i;
  303. /* send current working dir */
  304. (void) write(s, cwd, strlen(cwd) + 1);
  305. /* send environment (count followed by variables, each \0-term) */
  306. for (ep = environ; *ep != NULL; ep++)
  307. count++;
  308. snprintf(tmpstr, sizeof(tmpstr), "%d", count);
  309. (void) write(s, tmpstr, strlen(tmpstr) + 1);
  310. for (ep = environ; *ep != NULL; ep++)
  311. (void) write(s, *ep, strlen(*ep) + 1);
  312. /* send elan capability */
  313. if (qsw_encode_cap(tmpstr, sizeof(tmpstr), &cap) < 0)
  314. return -1;
  315. (void) write(s, tmpstr, strlen(tmpstr) + 1);
  316. for (i = 0; i < qsw_cap_bitmap_count(); i += 16) {
  317. if (qsw_encode_cap_bitmap(tmpstr, sizeof(tmpstr), &cap, i) < 0)
  318. return -1;
  319. (void) write(s, tmpstr, strlen(tmpstr) + 1);
  320. }
  321. /* send elan info */
  322. qinfo.nodeid = qinfo.rank = qinfo.procid = nodeid;
  323. if (qsw_encode_info(tmpstr, sizeof(tmpstr), &qinfo) < 0)
  324. return -1;
  325. (void) write(s, tmpstr, strlen(tmpstr) + 1);
  326. return 0;
  327. }
  328. /*
  329. * Derived from the rcmd() libc call, with modified interface.
  330. * This version is MT-safe. Errors are displayed in pdsh-compat format.
  331. * Connection can time out.
  332. * ahost (IN) target hostname
  333. * locuser (IN) local username
  334. * remuser (IN) remote username
  335. * cmd (IN) remote command to execute under shell
  336. * nodeid (IN) node index for this connection
  337. * fd2p (IN) if non NULL, return stderr file descriptor here
  338. * int (RETURN) -1 on error, socket for I/O on success
  339. */
  340. static int
  341. qcmd(char *ahost, char *addr, char *locuser, char *remuser, char *cmd,
  342. int nodeid, int *fd2p, void **arg)
  343. {
  344. struct sockaddr_in sin, from;
  345. sigset_t oldset, blockme;
  346. pid_t pid;
  347. int s, lport, timo, rv;
  348. char c;
  349. struct xpollfd xpfds[2];
  350. pid = getpid();
  351. sigemptyset(&blockme);
  352. sigaddset(&blockme, SIGURG);
  353. pthread_sigmask(SIG_BLOCK, &blockme, &oldset);
  354. for (timo = 1, lport = IPPORT_RESERVED - 1;;) {
  355. s = privsep_rresvport(&lport);
  356. if (s < 0) {
  357. if (errno == EAGAIN)
  358. err("%p: %S: qcmd: socket: all ports in use\n", ahost);
  359. else
  360. err("%p: %S: qcmd: socket: %m\n", ahost);
  361. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  362. return (-1);
  363. }
  364. fcntl(s, F_SETOWN, pid);
  365. sin.sin_family = AF_INET;
  366. memcpy(&sin.sin_addr, addr, IP_ADDR_LEN);
  367. sin.sin_port = htons(QSHELL_PORT);
  368. rv = connect(s, (struct sockaddr *) &sin, sizeof(sin));
  369. if (rv >= 0)
  370. break;
  371. (void) close(s);
  372. if (errno == EADDRINUSE) {
  373. lport--;
  374. continue;
  375. }
  376. if (errno == ECONNREFUSED && timo <= 16) {
  377. (void) sleep(timo);
  378. timo *= 2;
  379. continue;
  380. }
  381. if (errno == EINTR)
  382. err("%p: %S: connect: timed out\n", ahost);
  383. else
  384. err("%p: %S: connect: %m\n", ahost);
  385. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  386. return (-1);
  387. }
  388. lport--;
  389. if (fd2p == 0) {
  390. write(s, "", 1);
  391. lport = 0;
  392. } else {
  393. char num[8];
  394. int s2 = privsep_rresvport(&lport), s3;
  395. socklen_t len = sizeof(from); /* arg to accept */
  396. if (s2 < 0)
  397. goto bad;
  398. listen(s2, 1);
  399. (void) snprintf(num, sizeof(num), "%d", lport);
  400. if (write(s, num, strlen(num) + 1) != strlen(num) + 1) {
  401. err("%p: %S: qcmd: write (setting up stderr): %m\n", ahost);
  402. (void) close(s2);
  403. goto bad;
  404. }
  405. errno = 0;
  406. xpfds[0].fd = s;
  407. xpfds[1].fd = s2;
  408. xpfds[0].events = xpfds[1].events = XPOLLREAD;
  409. if (((rv = xpoll(xpfds, 2, -1)) < 0) || rv != 1 || (xpfds[0].revents > 0)) {
  410. if (errno != 0)
  411. err("%p: %S: qcmd: xpoll (setting up stderr): %m\n", ahost);
  412. else
  413. err("%p: %S: qcmd: xpoll: protocol failure in circuit setup\n", ahost);
  414. (void) close(s2);
  415. goto bad;
  416. }
  417. s3 = accept(s2, (struct sockaddr *) &from, &len);
  418. (void) close(s2);
  419. if (s3 < 0) {
  420. err("%p: %S: qcmd: accept: %m\n", ahost);
  421. lport = 0;
  422. goto bad;
  423. }
  424. *fd2p = s3;
  425. from.sin_port = ntohs((u_short) from.sin_port);
  426. if (from.sin_family != AF_INET ||
  427. from.sin_port >= IPPORT_RESERVED ||
  428. from.sin_port < IPPORT_RESERVED / 2) {
  429. err("%p: %S: socket: protocol failure in circuit setup\n",
  430. ahost);
  431. goto bad2;
  432. }
  433. }
  434. (void) write(s, locuser, strlen(locuser) + 1);
  435. (void) write(s, remuser, strlen(remuser) + 1);
  436. (void) write(s, cmd, strlen(cmd) + 1);
  437. if (_qcmd_send_extra_args(s, nodeid) < 0)
  438. goto bad2;
  439. rv = read(s, &c, 1);
  440. if (rv < 0) {
  441. if (errno == EINTR)
  442. err("%p: %S: read: protocol failure: %s\n",
  443. ahost, "timed out");
  444. else
  445. err("%p: %S: read: protocol failure: %m\n", ahost);
  446. goto bad2;
  447. } else if (rv != 1) {
  448. err("%p: %S: read: protocol failure: %s\n",
  449. ahost, "invalid response");
  450. goto bad2;
  451. }
  452. if (c != 0) {
  453. /* retrieve error string from remote server */
  454. char tmpbuf[LINEBUFSIZE];
  455. char *p = tmpbuf;
  456. while (read(s, &c, 1) == 1) {
  457. *p++ = c;
  458. if (c == '\n')
  459. break;
  460. }
  461. if (c != '\n')
  462. *p++ = '\n';
  463. *p++ = '\0';
  464. err("%S: %s", ahost, tmpbuf);
  465. goto bad2;
  466. }
  467. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  468. return (s);
  469. bad2:
  470. if (lport)
  471. (void) close(*fd2p);
  472. bad:
  473. (void) close(s);
  474. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  475. return (-1);
  476. }
  477. /*
  478. * vi:tabstop=4 shiftwidth=4 expandtab
  479. */