/src/modules/mqcmd.c

https://code.google.com/ · C · 848 lines · 518 code · 121 blank · 209 comment · 117 complexity · aec8f2b90f0f0a9a37aef28b53991dd3 MD5 · raw file

  1. /*****************************************************************************\
  2. * $Id$
  3. *****************************************************************************
  4. * Copyright (C) 2001-2006 The Regents of the University of California.
  5. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6. * Written by Jim Garlick <garlick@llnl.gov>.
  7. * UCRL-CODE-2003-005.
  8. *
  9. * This file is part of Pdsh, a parallel remote shell program.
  10. * For details, see <http://www.llnl.gov/linux/pdsh/>.
  11. *
  12. * Pdsh is free software; you can redistribute it and/or modify it under
  13. * the terms of the GNU General Public License as published by the Free
  14. * Software Foundation; either version 2 of the License, or (at your option)
  15. * any later version.
  16. *
  17. * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
  18. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  19. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  20. * details.
  21. *
  22. * You should have received a copy of the GNU General Public License along
  23. * with Pdsh; if not, write to the Free Software Foundation, Inc.,
  24. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  25. \*****************************************************************************/
  26. /*
  27. * Started with BSD rcmd.c which is:
  28. *
  29. * Copyright (c) 1983, 1993, 1994, 2003
  30. * The Regents of the University of California. All rights reserved.
  31. *
  32. * Redistribution and use in source and binary forms, with or without
  33. * modification, are permitted provided that the following conditions
  34. * are met:
  35. * 1. Redistributions of source code must retain the above copyright
  36. * notice, this list of conditions and the following disclaimer.
  37. *
  38. * 2. Redistributions in binary form must reproduce the above copyright
  39. * notice, this list of conditions and the following disclaimer in the
  40. * documentation and/or other materials provided with the distribution.
  41. *
  42. * 3. All advertising materials mentioning features or use of this software
  43. * must display the following acknowledgement:
  44. * This product includes software developed by the University of
  45. * California, Berkeley and its contributors.
  46. *
  47. * 4. Neither the name of the University nor the names of its contributors
  48. * may be used to endorse or promote products derived from this software
  49. * without specific prior written permission.
  50. *
  51. * 5. This is free software; you can redistribute it and/or modify it
  52. * under the terms of the GNU General Public License as published
  53. * by the Free Software Foundation; either version 2 of the
  54. * License, or (at your option) any later version.
  55. *
  56. * 6. This is distributed in the hope that it will be useful, but
  57. * WITHOUT ANY WARRANTY; without even the implied warranty of
  58. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  59. * GNU General Public License for more details.
  60. *
  61. * 7. You should have received a copy of the GNU General Public License;
  62. * if not, write to the Free Software Foundation, Inc., 59 Temple
  63. * Place, Suite 330, Boston, MA 02111-1307 USA.
  64. *
  65. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  66. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  69. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  70. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  71. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  72. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  73. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  74. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  75. * SUCH DAMAGE.
  76. */
  77. #if defined(LIBC_SCCS) && !defined(lint)
  78. static char sccsid[] = "@(#)mcmd.c Based from: 8.3 (Berkeley) 3/26/94";
  79. #endif /* LIBC_SCCS and not lint */
  80. #if HAVE_CONFIG_H
  81. #include "config.h"
  82. #endif
  83. #include <sys/param.h>
  84. #include <sys/types.h>
  85. #include <sys/time.h>
  86. #include <sys/socket.h>
  87. #include <sys/stat.h>
  88. #ifdef HAVE_PTHREAD
  89. #include <pthread.h>
  90. #endif
  91. #include <netinet/in.h>
  92. #include <arpa/inet.h>
  93. #include <signal.h>
  94. #if HAVE_FCNTL_H
  95. #include <fcntl.h>
  96. #endif
  97. #include <netdb.h>
  98. #if HAVE_UNISTD_H
  99. #include <unistd.h>
  100. #endif
  101. #include <pwd.h>
  102. #include <errno.h>
  103. #include <ctype.h>
  104. #include <string.h>
  105. #include <stdio.h>
  106. #include <string.h>
  107. #include <stdlib.h>
  108. #include <elan3/elanvp.h>
  109. #include <munge.h>
  110. #include "src/qsnet/qswutil.h"
  111. #include "src/common/xmalloc.h" /* Free() */
  112. #include "src/common/macros.h" /* LINEBUFSIZE && IP_ADDR_LEN */
  113. #include "src/common/err.h"
  114. #include "src/common/fd.h"
  115. #include "src/common/xpoll.h"
  116. #include "src/pdsh/mod.h"
  117. #define MQSH_PORT 21234
  118. extern char **environ;
  119. static bool dist_set = false;
  120. static bool cyclic = false;
  121. static int nprocs = 1;
  122. static unsigned int railmask = 1;
  123. static bool railmask_set = false;
  124. static char cwd[MAXPATHLEN + 1];
  125. static qsw_info_t qinfo;
  126. static ELAN_CAPABILITY cap;
  127. #ifdef HAVE_PTHREAD
  128. #define SET_PTHREAD() pthread_sigmask(SIG_BLOCK, &blockme, &oldset)
  129. #define RESTORE_PTHREAD() pthread_sigmask(SIG_SETMASK, &oldset, NULL)
  130. #define EXIT_PTHREAD() RESTORE_PTHREAD(); \
  131. return -1
  132. #else
  133. #define SET_PTHREAD()
  134. #define RESTORE_PTHREAD()
  135. #define EXIT_PTHREAD() return -1
  136. #endif
  137. #if STATIC_MODULES
  138. # define pdsh_module_info mqcmd_module_info
  139. # define pdsh_module_priority mqcmd_module_priority
  140. #endif
  141. int pdsh_module_priority = DEFAULT_MODULE_PRIORITY;
  142. static int mqcmd_postop(opt_t *opt);
  143. static int mqcmd_opt_m(opt_t *, int, char *);
  144. static int mqcmd_opt_n(opt_t *, int, char *);
  145. static int mqcmd_opt_r(opt_t *, int, char *);
  146. static int mqcmd_init(opt_t *);
  147. static int mqcmd_signal(int, void *, int);
  148. static int mqcmd(char *, char *, char *, char *, char *, int, int *, void **);
  149. /* random num for all jobs in this group */
  150. static unsigned int randy = -1;
  151. /*
  152. * Export pdsh module operations structure
  153. */
  154. struct pdsh_module_operations mqcmd_module_ops = {
  155. (ModInitF) NULL,
  156. (ModExitF) NULL,
  157. (ModReadWcollF) NULL,
  158. (ModPostOpF) mqcmd_postop,
  159. };
  160. /*
  161. * Export rcmd module operations
  162. */
  163. struct pdsh_rcmd_operations mqcmd_rcmd_ops = {
  164. (RcmdInitF) mqcmd_init,
  165. (RcmdSigF) mqcmd_signal,
  166. (RcmdF) mqcmd,
  167. };
  168. /*
  169. * Export module options
  170. */
  171. struct pdsh_module_option mqcmd_module_options[] =
  172. { { 'm', "block|cyclic", "(mqshell) control assignment of procs to nodes",
  173. DSH, (optFunc) mqcmd_opt_m },
  174. { 'n', "n", "(mqshell) set number of tasks per node",
  175. DSH, (optFunc) mqcmd_opt_n },
  176. { 'r', "railmask", "(mqshell) set rail bitmask for job on multirail system",
  177. DSH, (optFunc) mqcmd_opt_r },
  178. PDSH_OPT_TABLE_END
  179. };
  180. /*
  181. * Mqcmd module info
  182. */
  183. struct pdsh_module pdsh_module_info = {
  184. "rcmd",
  185. "mqsh",
  186. "Jim Garlick <garlick1@llnl.gov>",
  187. "Run MPI jobs over QsNet with mrsh authentication",
  188. DSH,
  189. &mqcmd_module_ops,
  190. &mqcmd_rcmd_ops,
  191. &mqcmd_module_options[0],
  192. };
  193. static int
  194. mqcmd_opt_m(opt_t *pdsh_opts, int opt, char *arg)
  195. {
  196. if (strcmp(arg, "block") == 0)
  197. cyclic = false;
  198. else if (strcmp(arg, "cyclic") == 0)
  199. cyclic = true;
  200. else
  201. return -1;
  202. dist_set = true;
  203. return 0;
  204. }
  205. static int
  206. mqcmd_opt_n(opt_t *pdsh_opts, int opt, char *arg)
  207. {
  208. nprocs = atoi(arg);
  209. return 0;
  210. }
  211. static int
  212. mqcmd_opt_r(opt_t *pdsh_opts, int opt, char *arg)
  213. {
  214. char *p = NULL;
  215. long int val = strtol (arg, &p, 0);
  216. if (*p != '\0')
  217. errx ("%p: Invalid value for railmask: \"%s\"\n", arg);
  218. railmask = (unsigned int) val;
  219. railmask_set = true;
  220. return (0);
  221. }
  222. static int mqcmd_postop(opt_t *opt)
  223. {
  224. int errors = 0;
  225. if (strcmp(opt->rcmd_name, "mqsh") == 0) {
  226. if (opt->fanout != DFLT_FANOUT && opt->wcoll != NULL) {
  227. if (opt->fanout != hostlist_count(opt->wcoll)) {
  228. err("%p: mqcmd: fanout must = target node list length "
  229. "with -R mqsh\n");
  230. errors++;
  231. }
  232. }
  233. if (nprocs <= 0) {
  234. err("%p: -n should be > 0\n");
  235. errors++;
  236. }
  237. if ((railmask == 0) || (railmask > QSW_RAILMASK_MAX)) {
  238. err ("%p: mqcmd: invalid value %d for -r railmask\n", railmask);
  239. errors++;
  240. }
  241. } else {
  242. if (nprocs != 1) {
  243. err("%p: mqcmd: -n can only be specified with -R mqsh\n");
  244. errors++;
  245. }
  246. if (dist_set) {
  247. err("%p: mqcmd: -m may only be specified with -R mqsh\n");
  248. errors++;
  249. }
  250. if (railmask_set) {
  251. err("%p: mqcmd: -r may only be specified with -R mqsh\n");
  252. errors++;
  253. }
  254. }
  255. return errors;
  256. }
  257. static int
  258. _mqcmd_opt_init(opt_t *opt)
  259. {
  260. if (opt->fanout == DFLT_FANOUT && opt->wcoll != NULL)
  261. opt->fanout = hostlist_count(opt->wcoll);
  262. else {
  263. err("%p: mqcmd: Unable to set appropriate fanout\n");
  264. return -1;
  265. }
  266. opt->labels = false;
  267. opt->kill_on_fail = true;
  268. if (opt->dshpath != NULL)
  269. Free((void **) &opt->dshpath);
  270. return 0;
  271. }
  272. /*
  273. * Intialize elan capability and info structures that will be used when
  274. * running the job.
  275. * wcoll (IN) list of nodes
  276. */
  277. static int mqcmd_init(opt_t * opt)
  278. {
  279. int totprocs = nprocs * hostlist_count(opt->wcoll);
  280. int rv, rand_fd;
  281. /*
  282. * Drop privileges if running setuid root
  283. */
  284. if ((geteuid() == 0) && (getuid() != 0))
  285. setuid (getuid ());
  286. /*
  287. * Verify constraints for running Elan jobs
  288. * and initialize options.
  289. */
  290. if (_mqcmd_opt_init(opt) < 0)
  291. return -1;
  292. if (getcwd(cwd, sizeof(cwd)) == NULL) { /* cache working directory */
  293. err("%p: mqcmd: getcwd failed: %m\n");
  294. return -1;
  295. }
  296. if (qsw_init() < 0)
  297. exit(1);
  298. /* initialize Elan capability structure. */
  299. if (qsw_init_capability(&cap, totprocs, opt->wcoll, cyclic, railmask) < 0) {
  300. err("%p: mqcmd: failed to initialize Elan capability\n");
  301. return -1;
  302. }
  303. qsw_fini();
  304. /* initialize elan info structure */
  305. qinfo.prgnum = qsw_get_prgnum(); /* call after qsw_init_capability */
  306. qinfo.nnodes = hostlist_count(opt->wcoll);
  307. qinfo.nprocs = totprocs;
  308. qinfo.nodeid = qinfo.procid = qinfo.rank = 0;
  309. /*
  310. * Generate a random number to send in our package to the
  311. * server. We will see it again and compare it when the
  312. * server sets up the stderr socket and sends it to us.
  313. * We need to loop for the tiny possibility we read 0 :P
  314. */
  315. if ((rand_fd = open ("/dev/urandom", O_RDONLY | O_NONBLOCK)) < 0 ) {
  316. err("%p: mqcmd: Open of /dev/urandom failed\n");
  317. return -1;
  318. }
  319. do {
  320. if ((rv = read (rand_fd, &randy, sizeof(uint32_t))) < 0) {
  321. close(rand_fd);
  322. err("%p: mqcmd: Read of /dev/urandom failed\n");
  323. return -1;
  324. }
  325. if (rv < (int) (sizeof(uint32_t))) {
  326. close(rand_fd);
  327. err("%p: mqcmd: Read returned too few bytes\n");
  328. return -1;
  329. }
  330. } while (randy == 0);
  331. close(rand_fd);
  332. return 0;
  333. }
  334. static int
  335. mqcmd_signal(int fd, void *arg, int signum)
  336. {
  337. char c;
  338. if (fd >= 0) {
  339. /* set non-blocking mode for write - just take our best shot */
  340. if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
  341. err("%p: fcntl: %m\n");
  342. c = (char) signum;
  343. write(fd, &c, 1);
  344. }
  345. return 0;
  346. }
  347. /*
  348. * Send extra arguments to qshell server
  349. * s (IN) socket
  350. * nodeid (IN) node index for this connection
  351. */
  352. static int _mqcmd_send_extra_args(int s, int nodeid, const char *ahost)
  353. {
  354. char **ep;
  355. char tmpstr[1024];
  356. int count = 0;
  357. int i;
  358. /* send current working dir */
  359. if (fd_write_n(s, cwd, strlen(cwd) + 1) < 0) {
  360. err("%p: %S: error writing cwd: %m\n", ahost);
  361. return -1;
  362. }
  363. /* send environment (count followed by variables, each \0-term) */
  364. for (ep = environ; *ep != NULL; ep++)
  365. count++;
  366. snprintf(tmpstr, sizeof(tmpstr), "%d", count);
  367. if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
  368. err("%p: %S: error writing envcount: %m\n", ahost);
  369. return -1;
  370. }
  371. for (ep = environ; *ep != NULL; ep++) {
  372. if (fd_write_n(s, *ep, strlen(*ep) + 1) < 0) {
  373. err("%p: %S: error writing environemtn: %m\n", ahost);
  374. return -1;
  375. }
  376. }
  377. /* send elan capability */
  378. if (qsw_encode_cap(tmpstr, sizeof(tmpstr), &cap) < 0)
  379. return -1;
  380. if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
  381. err("%p: %S: error writing elan capability: %m\n", ahost);
  382. return -1;
  383. }
  384. for (i = 0; i < qsw_cap_bitmap_count(); i += 16) {
  385. if (qsw_encode_cap_bitmap(tmpstr, sizeof(tmpstr), &cap, i) < 0)
  386. return -1;
  387. if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
  388. err("%p: %S: error writing bitmap: %m\n", ahost);
  389. return -1;
  390. }
  391. }
  392. /* send elan info */
  393. qinfo.nodeid = qinfo.rank = qinfo.procid = nodeid;
  394. if (qsw_encode_info(tmpstr, sizeof(tmpstr), &qinfo) < 0)
  395. return -1;
  396. if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
  397. err("%p: %S: error writing qinfo: %m\n", ahost);
  398. return -1;
  399. }
  400. return 0;
  401. }
  402. /*
  403. * Derived from the rcmd() libc call, with modified interface.
  404. * This version is MT-safe. Errors are displayed in pdsh-compat format.
  405. * Connection can time out.
  406. * ahost (IN) target hostname
  407. * addr (IN) 4 byte internet address
  408. * locuser (IN) not used
  409. * remuser (IN) remote username
  410. * cmd (IN) remote command to execute under shell
  411. * int nodeid (IN) node index for this connection
  412. * fd2p (IN) if non NULL, return stderr file descriptor here
  413. * int (RETURN) -1 on error, socket for I/O on success
  414. *
  415. * Combination of code derived from mcmd by Mike Haskell, qcmd by
  416. * Jim Garlick, and a variety of minor modifications.
  417. */
  418. static int
  419. mqcmd(char *ahost, char *addr, char *locuser, char *remuser, char *cmd,
  420. int nodeid, int *fd2p, void **arg)
  421. {
  422. struct sockaddr m_socket;
  423. struct sockaddr_in *getp;
  424. struct sockaddr_in sin, from;
  425. struct sockaddr_storage ss;
  426. struct in_addr m_in;
  427. unsigned int rand, randl;
  428. unsigned char *hptr;
  429. int s, s2, rv, mcount, lport;
  430. char c;
  431. char num[6] = {0};
  432. char *mptr;
  433. char *mbuf;
  434. char *tmbuf;
  435. char *m;
  436. char *mpvers;
  437. char num_seq[12] = {0};
  438. socklen_t len;
  439. sigset_t blockme;
  440. sigset_t oldset;
  441. char haddrdot[16] = {0};
  442. munge_ctx_t ctx;
  443. struct xpollfd xpfds[2];
  444. sigemptyset(&blockme);
  445. sigaddset(&blockme, SIGURG);
  446. sigaddset(&blockme, SIGPIPE);
  447. SET_PTHREAD();
  448. if (( rv = strcmp(ahost,"localhost")) == 0 ) {
  449. errno = EACCES;
  450. err("%p: %S: mqcmd: Can't use localhost\n", ahost);
  451. EXIT_PTHREAD();
  452. }
  453. /* Convert randy to decimal string, 0 if we dont' want stderr */
  454. if (fd2p != NULL)
  455. snprintf(num_seq, sizeof(num_seq),"%d",randy);
  456. else
  457. snprintf(num_seq, sizeof(num_seq),"%d",0);
  458. /*
  459. * Start setup of the stdin/stdout socket...
  460. */
  461. lport = 0;
  462. len = sizeof(struct sockaddr_in);
  463. if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
  464. err("%p: %S: mqcmd: socket call stdout failed: %m\n", ahost);
  465. EXIT_PTHREAD();
  466. }
  467. memset (&ss, '\0', sizeof(ss));
  468. ss.ss_family = AF_INET;
  469. if (bind(s, (struct sockaddr *)&ss, len) < 0) {
  470. err("%p: %S: mqcmd: bind failed: %m\n", ahost);
  471. goto bad;
  472. }
  473. sin.sin_family = AF_INET;
  474. memcpy(&sin.sin_addr.s_addr, addr, IP_ADDR_LEN);
  475. sin.sin_port = htons(MQSH_PORT);
  476. if (connect(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
  477. err("%p: %S: mqcmd: connect failed: %m\n", ahost);
  478. goto bad;
  479. }
  480. lport = 0;
  481. s2 = -1;
  482. if (fd2p != NULL) {
  483. /*
  484. * Start the socket setup for the stderr.
  485. */
  486. struct sockaddr_in sin2;
  487. if ((s2 = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
  488. err("%p: %S: mqcmd: socket call for stderr failed: %m\n", ahost);
  489. goto bad;
  490. }
  491. memset (&sin2, 0, sizeof(sin2));
  492. sin2.sin_family = AF_INET;
  493. sin2.sin_addr.s_addr = htonl(INADDR_ANY);
  494. sin2.sin_port = 0;
  495. if (bind(s2, (struct sockaddr *)&sin2, sizeof(sin2)) < 0) {
  496. err("%p: %S: mqcmd: bind failed: %m\n", ahost);
  497. close(s2);
  498. goto bad;
  499. }
  500. len = sizeof(struct sockaddr);
  501. /*
  502. * Retrieve our port number so we can hand it to the server
  503. * for the return (stderr) connection...
  504. */
  505. /* getsockname is thread safe */
  506. if (getsockname(s2,&m_socket,&len) < 0) {
  507. err("%p: %S: mqcmd: getsockname failed: %m\n", ahost);
  508. close(s2);
  509. goto bad;
  510. }
  511. getp = (struct sockaddr_in *)&m_socket;
  512. lport = ntohs(getp->sin_port);
  513. if (listen(s2, 1) < 0) {
  514. err("%p: %S: mqcmd: listen() failed: %m\n", ahost);
  515. close(s2);
  516. goto bad;
  517. }
  518. }
  519. snprintf(num,sizeof(num),"%d",lport);
  520. memcpy(&m_in.s_addr, addr, IP_ADDR_LEN);
  521. /* inet_ntoa is not thread safe, so we use the following,
  522. * which is more or less ripped from glibc
  523. */
  524. hptr = (unsigned char *)&m_in;
  525. sprintf(haddrdot, "%u.%u.%u.%u", hptr[0], hptr[1], hptr[2], hptr[3]);
  526. /*
  527. * We call munge_encode which will take what we write in and return a
  528. * pointer to an munged buffer. What we get back is a null terminated
  529. * string of encrypted characters.
  530. *
  531. * The format of the unmunged buffer is as follows (each a string terminated
  532. * with a '\0' (null):
  533. *
  534. * stderr_port_number & /dev/urandom_client_produce_number are 0
  535. * if user did not request stderr socket
  536. *
  537. * SIZE EXAMPLE
  538. * ========== =============
  539. * remote_user_name variable "mhaskell"
  540. * '\0'
  541. * dotted_decimal_address_of_this_server 7-15 bytes "134.9.11.155"
  542. * '\0'
  543. * stderr_port_number 4-8 bytes "50111"
  544. * '\0'
  545. * /dev/urandom_client_produced_number 1-8 bytes "1f79ca0e"
  546. * '\0'
  547. * users_command variable "ls -al"
  548. * '\0' '\0'
  549. *
  550. * (The last extra null is accounted for in the following line's last strlen() call.)
  551. */
  552. mcount = ((strlen(remuser)+1) + (strlen(haddrdot)+1) + (strlen(num)+1) +
  553. (strlen(num_seq)+1) + strlen(cmd)+2);
  554. tmbuf = mbuf = malloc(mcount);
  555. if (tmbuf == NULL) {
  556. err("%p: %S: mqcmd: Error from malloc\n", ahost);
  557. close(s2);
  558. goto bad;
  559. }
  560. /*
  561. * The following memset() call takes the extra trailing null as part of its
  562. * count as well.
  563. */
  564. memset(mbuf,0,mcount);
  565. mptr = strcpy(mbuf, remuser);
  566. mptr += strlen(remuser)+1;
  567. mptr = strcpy(mptr, haddrdot);
  568. mptr += strlen(haddrdot)+1;
  569. mptr = strcpy(mptr, num);
  570. mptr += strlen(num)+1;
  571. mptr = strcpy(mptr, num_seq);
  572. mptr += strlen(num_seq)+1;
  573. mptr = strcpy(mptr, cmd);
  574. if ((ctx = munge_ctx_create()) == NULL)
  575. goto bad;
  576. if ((rv = munge_encode(&m,0,mbuf,mcount)) != EMUNGE_SUCCESS) {
  577. err("%p: %S: mqcmd: munge_encode: %S\n", ahost, munge_ctx_strerror(ctx));
  578. munge_ctx_destroy(ctx);
  579. close(s2);
  580. free(tmbuf);
  581. goto bad;
  582. }
  583. munge_ctx_destroy(ctx);
  584. mcount = (strlen(m)+1);
  585. /*
  586. * Write stderr port in the clear in case we can't decode for
  587. * some reason (i.e. bad credentials). May be 0 if user
  588. * doesn't want stderr
  589. */
  590. if (fd2p != NULL) {
  591. rv = fd_write_n(s, num, strlen(num)+1);
  592. if (rv != (strlen(num)+1)) {
  593. free(m);
  594. free(tmbuf);
  595. if (errno == EPIPE)
  596. err("%p: %S: mqcmd: Lost connection (EPIPE): %m\n", ahost);
  597. else
  598. err("%p: %S: mqcmd: Write of stderr port failed: %m\n", ahost);
  599. close(s2);
  600. goto bad;
  601. }
  602. } else {
  603. write(s, "", 1);
  604. lport = 0;
  605. }
  606. /*
  607. * Write the munge_encoded blob to the socket.
  608. */
  609. rv = fd_write_n(s, m, mcount);
  610. if (rv != mcount) {
  611. free(m);
  612. free(tmbuf);
  613. if (errno == EPIPE)
  614. err("%p: %S: mqcmd: Lost connection: %m\n", ahost);
  615. else
  616. err("%p: %S: mqcmd: Write to socket failed: %m\n", ahost);
  617. close(s2);
  618. goto bad;
  619. }
  620. free(m);
  621. free(tmbuf);
  622. if (fd2p != NULL) {
  623. /*
  624. * Wait for stderr connection from daemon.
  625. */
  626. int s3;
  627. errno = 0;
  628. xpfds[0].fd = s;
  629. xpfds[1].fd = s2;
  630. xpfds[0].events = xpfds[1].events = XPOLLREAD;
  631. if ( ((rv = xpoll(xpfds, 2, -1)) < 0)
  632. || rv != 1
  633. || (xpfds[0].revents > 0)) {
  634. if (errno != 0)
  635. err("%p: %S: mqcmd: xpoll (setting up stderr): %m\n", ahost);
  636. else
  637. err("%p: %S: mqcmd: xpoll: protocol failure in circuit setup\n", ahost);
  638. (void) close(s2);
  639. goto bad;
  640. }
  641. errno = 0;
  642. len = sizeof(from); /* arg to accept */
  643. if ((s3 = accept(s2, (struct sockaddr *)&from, &len)) < 0) {
  644. close(s2);
  645. err("%p: %S: mqcmd: accept (stderr) failed: %m\n", ahost);
  646. goto bad;
  647. }
  648. if (from.sin_family != AF_INET) {
  649. err("%p: %S: mqcmd: bad family type: %d\n", ahost, from.sin_family);
  650. goto bad2;
  651. }
  652. close(s2);
  653. /*
  654. * The following fixes a race condition between the daemon
  655. * and the client. The daemon is waiting for a null to
  656. * proceed. We do this to make sure that we have our
  657. * socket is up prior to the daemon running the command.
  658. */
  659. if (write(s,"",1) < 0) {
  660. err("%p: %S: mqcmd: Could not communicate to daemon to proceed: %m\n", ahost);
  661. close(s3);
  662. goto bad;
  663. }
  664. /*
  665. * Read from our stderr. The server should have placed our random number
  666. * we generated onto this socket.
  667. */
  668. rv = fd_read_n(s3, &rand, sizeof(rand));
  669. if (rv != (ssize_t) (sizeof(rand))) {
  670. err("%p: %S: mqcmd: Bad read of expected verification "
  671. "number off of stderr socket: %m\n", ahost);
  672. close(s3);
  673. goto bad;
  674. }
  675. randl = ntohl(rand);
  676. if (randl != randy) {
  677. char tmpbuf[LINEBUFSIZE] = {0};
  678. char *tptr = &tmpbuf[0];
  679. memcpy(tptr,(char *) &rand,sizeof(rand));
  680. tptr += sizeof(rand);
  681. rv = fd_read_line (s3, tptr, LINEBUFSIZE);
  682. if (rv < 0)
  683. err("%p: %S: mqcmd: Bad read of error from stderr: %m\n", ahost);
  684. else
  685. err("%p: %S: mqcmd: %s\n", ahost, &tmpbuf[0]);
  686. close(s3);
  687. goto bad;
  688. }
  689. /*
  690. * Set the stderr file descriptor for the user...
  691. */
  692. *fd2p = s3;
  693. }
  694. /* send extra information */
  695. if (_mqcmd_send_extra_args(s, nodeid, ahost) < 0) {
  696. err("%p: %S: mqcmd: error sending extra args\n", ahost);
  697. goto bad2;
  698. }
  699. if ((rv = read(s, &c, 1)) < 0) {
  700. err("%p: %S: mqcmd: read: protocol failure: %m\n", ahost);
  701. goto bad2;
  702. }
  703. if (rv != 1) {
  704. err("%p: %S: mqcmd: read: protocol failure: invalid response\n", ahost);
  705. goto bad2;
  706. }
  707. if (c != '\0') {
  708. /* retrieve error string from remote server */
  709. char tmpbuf[LINEBUFSIZE];
  710. if (fd_read_line (s, &tmpbuf[0], LINEBUFSIZE) < 0)
  711. err("%p: %S: mqcmd: Error from remote host\n", ahost);
  712. else
  713. err("%p: %S: %s\n", ahost, tmpbuf);
  714. goto bad2;
  715. }
  716. RESTORE_PTHREAD();
  717. return (s);
  718. bad2:
  719. if (lport)
  720. close(*fd2p);
  721. bad:
  722. close(s);
  723. EXIT_PTHREAD();
  724. }
  725. /*
  726. * vi:tabstop=4 shiftwidth=4 expandtab
  727. */