/src/modules/mqcmd.c
C | 848 lines | 518 code | 121 blank | 209 comment | 117 complexity | aec8f2b90f0f0a9a37aef28b53991dd3 MD5 | raw file
1/*****************************************************************************\ 2 * $Id$ 3 ***************************************************************************** 4 * Copyright (C) 2001-2006 The Regents of the University of California. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Jim Garlick <garlick@llnl.gov>. 7 * UCRL-CODE-2003-005. 8 * 9 * This file is part of Pdsh, a parallel remote shell program. 10 * For details, see <http://www.llnl.gov/linux/pdsh/>. 11 * 12 * Pdsh is free software; you can redistribute it and/or modify it under 13 * the terms of the GNU General Public License as published by the Free 14 * Software Foundation; either version 2 of the License, or (at your option) 15 * any later version. 16 * 17 * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY 18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 19 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 20 * details. 21 * 22 * You should have received a copy of the GNU General Public License along 23 * with Pdsh; if not, write to the Free Software Foundation, Inc., 24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 25\*****************************************************************************/ 26 27/* 28 * Started with BSD rcmd.c which is: 29 * 30 * Copyright (c) 1983, 1993, 1994, 2003 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * 5. This is free software; you can redistribute it and/or modify it 53 * under the terms of the GNU General Public License as published 54 * by the Free Software Foundation; either version 2 of the 55 * License, or (at your option) any later version. 56 * 57 * 6. This is distributed in the hope that it will be useful, but 58 * WITHOUT ANY WARRANTY; without even the implied warranty of 59 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 60 * GNU General Public License for more details. 61 * 62 * 7. You should have received a copy of the GNU General Public License; 63 * if not, write to the Free Software Foundation, Inc., 59 Temple 64 * Place, Suite 330, Boston, MA 02111-1307 USA. 65 * 66 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 67 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 69 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 71 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 72 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 73 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 74 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 75 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 76 * SUCH DAMAGE. 77 */ 78 79#if defined(LIBC_SCCS) && !defined(lint) 80static char sccsid[] = "@(#)mcmd.c Based from: 8.3 (Berkeley) 3/26/94"; 81#endif /* LIBC_SCCS and not lint */ 82 83#if HAVE_CONFIG_H 84#include "config.h" 85#endif 86 87#include <sys/param.h> 88#include <sys/types.h> 89#include <sys/time.h> 90#include <sys/socket.h> 91#include <sys/stat.h> 92 93#ifdef HAVE_PTHREAD 94#include <pthread.h> 95#endif 96 97#include <netinet/in.h> 98#include <arpa/inet.h> 99#include <signal.h> 100#if HAVE_FCNTL_H 101#include <fcntl.h> 102#endif 103#include <netdb.h> 104#if HAVE_UNISTD_H 105#include <unistd.h> 106#endif 107#include <pwd.h> 108#include <errno.h> 109#include <ctype.h> 110#include <string.h> 111 112#include <stdio.h> 113#include <string.h> 114#include <stdlib.h> 115 116#include <elan3/elanvp.h> 117#include <munge.h> 118 119#include "src/qsnet/qswutil.h" 120#include "src/common/xmalloc.h" /* Free() */ 121#include "src/common/macros.h" /* LINEBUFSIZE && IP_ADDR_LEN */ 122#include "src/common/err.h" 123#include "src/common/fd.h" 124#include "src/common/xpoll.h" 125#include "src/pdsh/mod.h" 126 127#define MQSH_PORT 21234 128 129extern char **environ; 130 131static bool dist_set = false; 132static bool cyclic = false; 133static int nprocs = 1; 134 135static unsigned int railmask = 1; 136static bool railmask_set = false; 137 138static char cwd[MAXPATHLEN + 1]; 139static qsw_info_t qinfo; 140static ELAN_CAPABILITY cap; 141 142#ifdef HAVE_PTHREAD 143#define SET_PTHREAD() pthread_sigmask(SIG_BLOCK, &blockme, &oldset) 144#define RESTORE_PTHREAD() pthread_sigmask(SIG_SETMASK, &oldset, NULL) 145#define EXIT_PTHREAD() RESTORE_PTHREAD(); \ 146return -1 147#else 148#define SET_PTHREAD() 149#define RESTORE_PTHREAD() 150#define EXIT_PTHREAD() return -1 151#endif 152 153#if STATIC_MODULES 154# define pdsh_module_info mqcmd_module_info 155# define pdsh_module_priority mqcmd_module_priority 156#endif 157 158int pdsh_module_priority = DEFAULT_MODULE_PRIORITY; 159 160static int mqcmd_postop(opt_t *opt); 161 162static int mqcmd_opt_m(opt_t *, int, char *); 163static int mqcmd_opt_n(opt_t *, int, char *); 164static int mqcmd_opt_r(opt_t *, int, char *); 165 166static int mqcmd_init(opt_t *); 167static int mqcmd_signal(int, void *, int); 168static int mqcmd(char *, char *, char *, char *, char *, int, int *, void **); 169 170/* random num for all jobs in this group */ 171static unsigned int randy = -1; 172 173/* 174 * Export pdsh module operations structure 175 */ 176struct pdsh_module_operations mqcmd_module_ops = { 177 (ModInitF) NULL, 178 (ModExitF) NULL, 179 (ModReadWcollF) NULL, 180 (ModPostOpF) mqcmd_postop, 181}; 182 183/* 184 * Export rcmd module operations 185 */ 186struct pdsh_rcmd_operations mqcmd_rcmd_ops = { 187 (RcmdInitF) mqcmd_init, 188 (RcmdSigF) mqcmd_signal, 189 (RcmdF) mqcmd, 190}; 191 192/* 193 * Export module options 194 */ 195struct pdsh_module_option mqcmd_module_options[] = 196{ { 'm', "block|cyclic", "(mqshell) control assignment of procs to nodes", 197 DSH, (optFunc) mqcmd_opt_m }, 198 { 'n', "n", "(mqshell) set number of tasks per node", 199 DSH, (optFunc) mqcmd_opt_n }, 200 { 'r', "railmask", "(mqshell) set rail bitmask for job on multirail system", 201 DSH, (optFunc) mqcmd_opt_r }, 202 PDSH_OPT_TABLE_END 203}; 204 205/* 206 * Mqcmd module info 207 */ 208struct pdsh_module pdsh_module_info = { 209 "rcmd", 210 "mqsh", 211 "Jim Garlick <garlick1@llnl.gov>", 212 "Run MPI jobs over QsNet with mrsh authentication", 213 DSH, 214 215 &mqcmd_module_ops, 216 &mqcmd_rcmd_ops, 217 &mqcmd_module_options[0], 218}; 219 220static int 221mqcmd_opt_m(opt_t *pdsh_opts, int opt, char *arg) 222{ 223 if (strcmp(arg, "block") == 0) 224 cyclic = false; 225 else if (strcmp(arg, "cyclic") == 0) 226 cyclic = true; 227 else 228 return -1; 229 230 dist_set = true; 231 232 return 0; 233} 234 235static int 236mqcmd_opt_n(opt_t *pdsh_opts, int opt, char *arg) 237{ 238 nprocs = atoi(arg); 239 return 0; 240} 241 242static int 243mqcmd_opt_r(opt_t *pdsh_opts, int opt, char *arg) 244{ 245 char *p = NULL; 246 long int val = strtol (arg, &p, 0); 247 248 if (*p != '\0') 249 errx ("%p: Invalid value for railmask: \"%s\"\n", arg); 250 251 railmask = (unsigned int) val; 252 railmask_set = true; 253 return (0); 254} 255 256static int mqcmd_postop(opt_t *opt) 257{ 258 int errors = 0; 259 260 if (strcmp(opt->rcmd_name, "mqsh") == 0) { 261 if (opt->fanout != DFLT_FANOUT && opt->wcoll != NULL) { 262 if (opt->fanout != hostlist_count(opt->wcoll)) { 263 err("%p: mqcmd: fanout must = target node list length " 264 "with -R mqsh\n"); 265 errors++; 266 } 267 } 268 if (nprocs <= 0) { 269 err("%p: -n should be > 0\n"); 270 errors++; 271 } 272 if ((railmask == 0) || (railmask > QSW_RAILMASK_MAX)) { 273 err ("%p: mqcmd: invalid value %d for -r railmask\n", railmask); 274 errors++; 275 } 276 } else { 277 if (nprocs != 1) { 278 err("%p: mqcmd: -n can only be specified with -R mqsh\n"); 279 errors++; 280 } 281 282 if (dist_set) { 283 err("%p: mqcmd: -m may only be specified with -R mqsh\n"); 284 errors++; 285 } 286 287 if (railmask_set) { 288 err("%p: mqcmd: -r may only be specified with -R mqsh\n"); 289 errors++; 290 } 291 } 292 293 return errors; 294} 295 296static int 297_mqcmd_opt_init(opt_t *opt) 298{ 299 if (opt->fanout == DFLT_FANOUT && opt->wcoll != NULL) 300 opt->fanout = hostlist_count(opt->wcoll); 301 else { 302 err("%p: mqcmd: Unable to set appropriate fanout\n"); 303 return -1; 304 } 305 306 opt->labels = false; 307 opt->kill_on_fail = true; 308 309 if (opt->dshpath != NULL) 310 Free((void **) &opt->dshpath); 311 312 return 0; 313} 314 315/* 316 * Intialize elan capability and info structures that will be used when 317 * running the job. 318 * wcoll (IN) list of nodes 319 */ 320static int mqcmd_init(opt_t * opt) 321{ 322 int totprocs = nprocs * hostlist_count(opt->wcoll); 323 int rv, rand_fd; 324 325 /* 326 * Drop privileges if running setuid root 327 */ 328 if ((geteuid() == 0) && (getuid() != 0)) 329 setuid (getuid ()); 330 331 /* 332 * Verify constraints for running Elan jobs 333 * and initialize options. 334 */ 335 if (_mqcmd_opt_init(opt) < 0) 336 return -1; 337 338 if (getcwd(cwd, sizeof(cwd)) == NULL) { /* cache working directory */ 339 err("%p: mqcmd: getcwd failed: %m\n"); 340 return -1; 341 } 342 343 if (qsw_init() < 0) 344 exit(1); 345 346 /* initialize Elan capability structure. */ 347 if (qsw_init_capability(&cap, totprocs, opt->wcoll, cyclic, railmask) < 0) { 348 err("%p: mqcmd: failed to initialize Elan capability\n"); 349 return -1; 350 } 351 352 qsw_fini(); 353 354 /* initialize elan info structure */ 355 qinfo.prgnum = qsw_get_prgnum(); /* call after qsw_init_capability */ 356 qinfo.nnodes = hostlist_count(opt->wcoll); 357 qinfo.nprocs = totprocs; 358 qinfo.nodeid = qinfo.procid = qinfo.rank = 0; 359 360 /* 361 * Generate a random number to send in our package to the 362 * server. We will see it again and compare it when the 363 * server sets up the stderr socket and sends it to us. 364 * We need to loop for the tiny possibility we read 0 :P 365 */ 366 if ((rand_fd = open ("/dev/urandom", O_RDONLY | O_NONBLOCK)) < 0 ) { 367 err("%p: mqcmd: Open of /dev/urandom failed\n"); 368 return -1; 369 } 370 371 do { 372 if ((rv = read (rand_fd, &randy, sizeof(uint32_t))) < 0) { 373 close(rand_fd); 374 err("%p: mqcmd: Read of /dev/urandom failed\n"); 375 return -1; 376 } 377 378 if (rv < (int) (sizeof(uint32_t))) { 379 close(rand_fd); 380 err("%p: mqcmd: Read returned too few bytes\n"); 381 return -1; 382 } 383 } while (randy == 0); 384 385 close(rand_fd); 386 387 return 0; 388} 389 390static int 391mqcmd_signal(int fd, void *arg, int signum) 392{ 393 char c; 394 395 if (fd >= 0) { 396 /* set non-blocking mode for write - just take our best shot */ 397 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) 398 err("%p: fcntl: %m\n"); 399 c = (char) signum; 400 write(fd, &c, 1); 401 } 402 return 0; 403} 404 405/* 406 * Send extra arguments to qshell server 407 * s (IN) socket 408 * nodeid (IN) node index for this connection 409 */ 410static int _mqcmd_send_extra_args(int s, int nodeid, const char *ahost) 411{ 412 char **ep; 413 char tmpstr[1024]; 414 int count = 0; 415 int i; 416 417 /* send current working dir */ 418 if (fd_write_n(s, cwd, strlen(cwd) + 1) < 0) { 419 err("%p: %S: error writing cwd: %m\n", ahost); 420 return -1; 421 } 422 423 /* send environment (count followed by variables, each \0-term) */ 424 for (ep = environ; *ep != NULL; ep++) 425 count++; 426 427 snprintf(tmpstr, sizeof(tmpstr), "%d", count); 428 if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) { 429 err("%p: %S: error writing envcount: %m\n", ahost); 430 return -1; 431 } 432 433 for (ep = environ; *ep != NULL; ep++) { 434 if (fd_write_n(s, *ep, strlen(*ep) + 1) < 0) { 435 err("%p: %S: error writing environemtn: %m\n", ahost); 436 return -1; 437 } 438 } 439 440 /* send elan capability */ 441 if (qsw_encode_cap(tmpstr, sizeof(tmpstr), &cap) < 0) 442 return -1; 443 444 if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) { 445 err("%p: %S: error writing elan capability: %m\n", ahost); 446 return -1; 447 } 448 449 for (i = 0; i < qsw_cap_bitmap_count(); i += 16) { 450 if (qsw_encode_cap_bitmap(tmpstr, sizeof(tmpstr), &cap, i) < 0) 451 return -1; 452 453 if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) { 454 err("%p: %S: error writing bitmap: %m\n", ahost); 455 return -1; 456 } 457 } 458 459 /* send elan info */ 460 qinfo.nodeid = qinfo.rank = qinfo.procid = nodeid; 461 if (qsw_encode_info(tmpstr, sizeof(tmpstr), &qinfo) < 0) 462 return -1; 463 464 if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) { 465 err("%p: %S: error writing qinfo: %m\n", ahost); 466 return -1; 467 } 468 469 return 0; 470} 471 472/* 473 * Derived from the rcmd() libc call, with modified interface. 474 * This version is MT-safe. Errors are displayed in pdsh-compat format. 475 * Connection can time out. 476 * ahost (IN) target hostname 477 * addr (IN) 4 byte internet address 478 * locuser (IN) not used 479 * remuser (IN) remote username 480 * cmd (IN) remote command to execute under shell 481 * int nodeid (IN) node index for this connection 482 * fd2p (IN) if non NULL, return stderr file descriptor here 483 * int (RETURN) -1 on error, socket for I/O on success 484 * 485 * Combination of code derived from mcmd by Mike Haskell, qcmd by 486 * Jim Garlick, and a variety of minor modifications. 487 */ 488static int 489mqcmd(char *ahost, char *addr, char *locuser, char *remuser, char *cmd, 490 int nodeid, int *fd2p, void **arg) 491{ 492 struct sockaddr m_socket; 493 struct sockaddr_in *getp; 494 struct sockaddr_in sin, from; 495 struct sockaddr_storage ss; 496 struct in_addr m_in; 497 unsigned int rand, randl; 498 unsigned char *hptr; 499 int s, s2, rv, mcount, lport; 500 char c; 501 char num[6] = {0}; 502 char *mptr; 503 char *mbuf; 504 char *tmbuf; 505 char *m; 506 char *mpvers; 507 char num_seq[12] = {0}; 508 socklen_t len; 509 sigset_t blockme; 510 sigset_t oldset; 511 char haddrdot[16] = {0}; 512 munge_ctx_t ctx; 513 struct xpollfd xpfds[2]; 514 515 sigemptyset(&blockme); 516 sigaddset(&blockme, SIGURG); 517 sigaddset(&blockme, SIGPIPE); 518 SET_PTHREAD(); 519 520 if (( rv = strcmp(ahost,"localhost")) == 0 ) { 521 errno = EACCES; 522 err("%p: %S: mqcmd: Can't use localhost\n", ahost); 523 EXIT_PTHREAD(); 524 } 525 526 /* Convert randy to decimal string, 0 if we dont' want stderr */ 527 if (fd2p != NULL) 528 snprintf(num_seq, sizeof(num_seq),"%d",randy); 529 else 530 snprintf(num_seq, sizeof(num_seq),"%d",0); 531 532 /* 533 * Start setup of the stdin/stdout socket... 534 */ 535 lport = 0; 536 len = sizeof(struct sockaddr_in); 537 538 if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { 539 err("%p: %S: mqcmd: socket call stdout failed: %m\n", ahost); 540 EXIT_PTHREAD(); 541 } 542 543 memset (&ss, '\0', sizeof(ss)); 544 ss.ss_family = AF_INET; 545 546 if (bind(s, (struct sockaddr *)&ss, len) < 0) { 547 err("%p: %S: mqcmd: bind failed: %m\n", ahost); 548 goto bad; 549 } 550 551 sin.sin_family = AF_INET; 552 553 memcpy(&sin.sin_addr.s_addr, addr, IP_ADDR_LEN); 554 555 sin.sin_port = htons(MQSH_PORT); 556 if (connect(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) { 557 err("%p: %S: mqcmd: connect failed: %m\n", ahost); 558 goto bad; 559 } 560 561 lport = 0; 562 s2 = -1; 563 if (fd2p != NULL) { 564 /* 565 * Start the socket setup for the stderr. 566 */ 567 struct sockaddr_in sin2; 568 569 if ((s2 = socket(AF_INET, SOCK_STREAM, 0)) < 0) { 570 err("%p: %S: mqcmd: socket call for stderr failed: %m\n", ahost); 571 goto bad; 572 } 573 574 memset (&sin2, 0, sizeof(sin2)); 575 sin2.sin_family = AF_INET; 576 sin2.sin_addr.s_addr = htonl(INADDR_ANY); 577 sin2.sin_port = 0; 578 579 if (bind(s2, (struct sockaddr *)&sin2, sizeof(sin2)) < 0) { 580 err("%p: %S: mqcmd: bind failed: %m\n", ahost); 581 close(s2); 582 goto bad; 583 } 584 585 len = sizeof(struct sockaddr); 586 587 /* 588 * Retrieve our port number so we can hand it to the server 589 * for the return (stderr) connection... 590 */ 591 592 /* getsockname is thread safe */ 593 if (getsockname(s2,&m_socket,&len) < 0) { 594 err("%p: %S: mqcmd: getsockname failed: %m\n", ahost); 595 close(s2); 596 goto bad; 597 } 598 599 getp = (struct sockaddr_in *)&m_socket; 600 lport = ntohs(getp->sin_port); 601 602 if (listen(s2, 1) < 0) { 603 err("%p: %S: mqcmd: listen() failed: %m\n", ahost); 604 close(s2); 605 goto bad; 606 } 607 } 608 609 snprintf(num,sizeof(num),"%d",lport); 610 memcpy(&m_in.s_addr, addr, IP_ADDR_LEN); 611 612 /* inet_ntoa is not thread safe, so we use the following, 613 * which is more or less ripped from glibc 614 */ 615 hptr = (unsigned char *)&m_in; 616 sprintf(haddrdot, "%u.%u.%u.%u", hptr[0], hptr[1], hptr[2], hptr[3]); 617 618 /* 619 * We call munge_encode which will take what we write in and return a 620 * pointer to an munged buffer. What we get back is a null terminated 621 * string of encrypted characters. 622 * 623 * The format of the unmunged buffer is as follows (each a string terminated 624 * with a '\0' (null): 625 * 626 * stderr_port_number & /dev/urandom_client_produce_number are 0 627 * if user did not request stderr socket 628 * 629 * SIZE EXAMPLE 630 * ========== ============= 631 * remote_user_name variable "mhaskell" 632 * '\0' 633 * dotted_decimal_address_of_this_server 7-15 bytes "134.9.11.155" 634 * '\0' 635 * stderr_port_number 4-8 bytes "50111" 636 * '\0' 637 * /dev/urandom_client_produced_number 1-8 bytes "1f79ca0e" 638 * '\0' 639 * users_command variable "ls -al" 640 * '\0' '\0' 641 * 642 * (The last extra null is accounted for in the following line's last strlen() call.) 643 */ 644 645 646 mcount = ((strlen(remuser)+1) + (strlen(haddrdot)+1) + (strlen(num)+1) + 647 (strlen(num_seq)+1) + strlen(cmd)+2); 648 tmbuf = mbuf = malloc(mcount); 649 if (tmbuf == NULL) { 650 err("%p: %S: mqcmd: Error from malloc\n", ahost); 651 close(s2); 652 goto bad; 653 } 654 /* 655 * The following memset() call takes the extra trailing null as part of its 656 * count as well. 657 */ 658 memset(mbuf,0,mcount); 659 660 mptr = strcpy(mbuf, remuser); 661 mptr += strlen(remuser)+1; 662 mptr = strcpy(mptr, haddrdot); 663 mptr += strlen(haddrdot)+1; 664 mptr = strcpy(mptr, num); 665 mptr += strlen(num)+1; 666 mptr = strcpy(mptr, num_seq); 667 mptr += strlen(num_seq)+1; 668 mptr = strcpy(mptr, cmd); 669 670 if ((ctx = munge_ctx_create()) == NULL) 671 goto bad; 672 673 if ((rv = munge_encode(&m,0,mbuf,mcount)) != EMUNGE_SUCCESS) { 674 err("%p: %S: mqcmd: munge_encode: %S\n", ahost, munge_ctx_strerror(ctx)); 675 munge_ctx_destroy(ctx); 676 close(s2); 677 free(tmbuf); 678 goto bad; 679 } 680 681 munge_ctx_destroy(ctx); 682 683 mcount = (strlen(m)+1); 684 685 /* 686 * Write stderr port in the clear in case we can't decode for 687 * some reason (i.e. bad credentials). May be 0 if user 688 * doesn't want stderr 689 */ 690 if (fd2p != NULL) { 691 rv = fd_write_n(s, num, strlen(num)+1); 692 if (rv != (strlen(num)+1)) { 693 free(m); 694 free(tmbuf); 695 if (errno == EPIPE) 696 err("%p: %S: mqcmd: Lost connection (EPIPE): %m\n", ahost); 697 else 698 err("%p: %S: mqcmd: Write of stderr port failed: %m\n", ahost); 699 close(s2); 700 goto bad; 701 } 702 } else { 703 write(s, "", 1); 704 lport = 0; 705 } 706 707 /* 708 * Write the munge_encoded blob to the socket. 709 */ 710 rv = fd_write_n(s, m, mcount); 711 if (rv != mcount) { 712 free(m); 713 free(tmbuf); 714 if (errno == EPIPE) 715 err("%p: %S: mqcmd: Lost connection: %m\n", ahost); 716 else 717 err("%p: %S: mqcmd: Write to socket failed: %m\n", ahost); 718 close(s2); 719 goto bad; 720 } 721 722 free(m); 723 free(tmbuf); 724 725 if (fd2p != NULL) { 726 /* 727 * Wait for stderr connection from daemon. 728 */ 729 int s3; 730 731 errno = 0; 732 xpfds[0].fd = s; 733 xpfds[1].fd = s2; 734 xpfds[0].events = xpfds[1].events = XPOLLREAD; 735 if ( ((rv = xpoll(xpfds, 2, -1)) < 0) 736 || rv != 1 737 || (xpfds[0].revents > 0)) { 738 if (errno != 0) 739 err("%p: %S: mqcmd: xpoll (setting up stderr): %m\n", ahost); 740 else 741 err("%p: %S: mqcmd: xpoll: protocol failure in circuit setup\n", ahost); 742 (void) close(s2); 743 goto bad; 744 } 745 746 errno = 0; 747 len = sizeof(from); /* arg to accept */ 748 749 if ((s3 = accept(s2, (struct sockaddr *)&from, &len)) < 0) { 750 close(s2); 751 err("%p: %S: mqcmd: accept (stderr) failed: %m\n", ahost); 752 goto bad; 753 } 754 755 if (from.sin_family != AF_INET) { 756 err("%p: %S: mqcmd: bad family type: %d\n", ahost, from.sin_family); 757 goto bad2; 758 } 759 760 close(s2); 761 762 /* 763 * The following fixes a race condition between the daemon 764 * and the client. The daemon is waiting for a null to 765 * proceed. We do this to make sure that we have our 766 * socket is up prior to the daemon running the command. 767 */ 768 if (write(s,"",1) < 0) { 769 err("%p: %S: mqcmd: Could not communicate to daemon to proceed: %m\n", ahost); 770 close(s3); 771 goto bad; 772 } 773 774 /* 775 * Read from our stderr. The server should have placed our random number 776 * we generated onto this socket. 777 */ 778 rv = fd_read_n(s3, &rand, sizeof(rand)); 779 if (rv != (ssize_t) (sizeof(rand))) { 780 err("%p: %S: mqcmd: Bad read of expected verification " 781 "number off of stderr socket: %m\n", ahost); 782 close(s3); 783 goto bad; 784 } 785 786 randl = ntohl(rand); 787 if (randl != randy) { 788 char tmpbuf[LINEBUFSIZE] = {0}; 789 char *tptr = &tmpbuf[0]; 790 791 memcpy(tptr,(char *) &rand,sizeof(rand)); 792 tptr += sizeof(rand); 793 rv = fd_read_line (s3, tptr, LINEBUFSIZE); 794 if (rv < 0) 795 err("%p: %S: mqcmd: Bad read of error from stderr: %m\n", ahost); 796 else 797 err("%p: %S: mqcmd: %s\n", ahost, &tmpbuf[0]); 798 close(s3); 799 goto bad; 800 } 801 802 /* 803 * Set the stderr file descriptor for the user... 804 */ 805 *fd2p = s3; 806 } 807 808 /* send extra information */ 809 if (_mqcmd_send_extra_args(s, nodeid, ahost) < 0) { 810 err("%p: %S: mqcmd: error sending extra args\n", ahost); 811 goto bad2; 812 } 813 814 if ((rv = read(s, &c, 1)) < 0) { 815 err("%p: %S: mqcmd: read: protocol failure: %m\n", ahost); 816 goto bad2; 817 } 818 819 if (rv != 1) { 820 err("%p: %S: mqcmd: read: protocol failure: invalid response\n", ahost); 821 goto bad2; 822 } 823 824 if (c != '\0') { 825 /* retrieve error string from remote server */ 826 char tmpbuf[LINEBUFSIZE]; 827 828 if (fd_read_line (s, &tmpbuf[0], LINEBUFSIZE) < 0) 829 err("%p: %S: mqcmd: Error from remote host\n", ahost); 830 else 831 err("%p: %S: %s\n", ahost, tmpbuf); 832 goto bad2; 833 } 834 RESTORE_PTHREAD(); 835 836 return (s); 837 838bad2: 839 if (lport) 840 close(*fd2p); 841bad: 842 close(s); 843 EXIT_PTHREAD(); 844} 845 846/* 847 * vi:tabstop=4 shiftwidth=4 expandtab 848 */