PageRenderTime 60ms CodeModel.GetById 17ms app.highlight 36ms RepoModel.GetById 1ms app.codeStats 1ms

/src/modules/mqcmd.c

https://code.google.com/
C | 848 lines | 518 code | 121 blank | 209 comment | 117 complexity | aec8f2b90f0f0a9a37aef28b53991dd3 MD5 | raw file
  1/*****************************************************************************\
  2 *  $Id$
  3 *****************************************************************************
  4 *  Copyright (C) 2001-2006 The Regents of the University of California.
  5 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6 *  Written by Jim Garlick <garlick@llnl.gov>.
  7 *  UCRL-CODE-2003-005.
  8 *  
  9 *  This file is part of Pdsh, a parallel remote shell program.
 10 *  For details, see <http://www.llnl.gov/linux/pdsh/>.
 11 *  
 12 *  Pdsh is free software; you can redistribute it and/or modify it under
 13 *  the terms of the GNU General Public License as published by the Free
 14 *  Software Foundation; either version 2 of the License, or (at your option)
 15 *  any later version.
 16 *  
 17 *  Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
 18 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 19 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 20 *  details.
 21 *  
 22 *  You should have received a copy of the GNU General Public License along
 23 *  with Pdsh; if not, write to the Free Software Foundation, Inc.,
 24 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 25\*****************************************************************************/
 26
 27/*
 28 * Started with BSD rcmd.c which is:
 29 * 
 30 * Copyright (c) 1983, 1993, 1994, 2003
 31 *      The Regents of the University of California.  All rights reserved.
 32 *
 33 * Redistribution and use in source and binary forms, with or without
 34 * modification, are permitted provided that the following conditions
 35 * are met:
 36 * 1. Redistributions of source code must retain the above copyright
 37 *    notice, this list of conditions and the following disclaimer.
 38 *
 39 * 2. Redistributions in binary form must reproduce the above copyright
 40 *    notice, this list of conditions and the following disclaimer in the
 41 *    documentation and/or other materials provided with the distribution.
 42 *
 43 * 3. All advertising materials mentioning features or use of this software
 44 *    must display the following acknowledgement:
 45 *      This product includes software developed by the University of
 46 *      California, Berkeley and its contributors.
 47 *
 48 * 4. Neither the name of the University nor the names of its contributors
 49 *    may be used to endorse or promote products derived from this software
 50 *    without specific prior written permission.
 51 *
 52 * 5. This is free software; you can redistribute it and/or modify it
 53 *    under the terms of the GNU General Public License as published
 54 *    by the Free Software Foundation; either version 2 of the
 55 *    License, or (at your option) any later version.
 56 *                              
 57 * 6. This is distributed in the hope that it will be useful, but
 58 *    WITHOUT ANY WARRANTY; without even the implied warranty of
 59 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 60 *    GNU General Public License for more details.
 61 *                                                           
 62 * 7. You should have received a copy of the GNU General Public License;
 63 *    if not, write to the Free Software Foundation, Inc., 59 Temple
 64 *    Place, Suite 330, Boston, MA  02111-1307  USA.
 65 *
 66 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 67 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 69 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 71 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 72 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 73 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 74 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 75 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 76 * SUCH DAMAGE.
 77 */
 78
 79#if defined(LIBC_SCCS) && !defined(lint)
 80static char sccsid[] = "@(#)mcmd.c      Based from: 8.3 (Berkeley) 3/26/94";
 81#endif /* LIBC_SCCS and not lint */
 82
 83#if     HAVE_CONFIG_H
 84#include "config.h"
 85#endif
 86
 87#include <sys/param.h>
 88#include <sys/types.h>
 89#include <sys/time.h>
 90#include <sys/socket.h>
 91#include <sys/stat.h>
 92
 93#ifdef HAVE_PTHREAD
 94#include <pthread.h>
 95#endif
 96
 97#include <netinet/in.h>
 98#include <arpa/inet.h>
 99#include <signal.h>
100#if HAVE_FCNTL_H
101#include <fcntl.h>
102#endif
103#include <netdb.h>
104#if HAVE_UNISTD_H
105#include <unistd.h>
106#endif
107#include <pwd.h>
108#include <errno.h>
109#include <ctype.h>
110#include <string.h>
111
112#include <stdio.h>
113#include <string.h>
114#include <stdlib.h>
115
116#include <elan3/elanvp.h>
117#include <munge.h>
118
119#include "src/qsnet/qswutil.h"
120#include "src/common/xmalloc.h"      /* Free() */
121#include "src/common/macros.h"       /* LINEBUFSIZE && IP_ADDR_LEN */
122#include "src/common/err.h"
123#include "src/common/fd.h"
124#include "src/common/xpoll.h"
125#include "src/pdsh/mod.h"
126
127#define MQSH_PORT       21234
128
129extern char **environ;
130
131static bool dist_set = false;
132static bool cyclic   = false;
133static int  nprocs   = 1;
134
135static unsigned int  railmask      = 1;
136static bool          railmask_set  = false;
137
138static char cwd[MAXPATHLEN + 1];
139static qsw_info_t qinfo;
140static ELAN_CAPABILITY cap;
141
142#ifdef HAVE_PTHREAD
143#define SET_PTHREAD()           pthread_sigmask(SIG_BLOCK, &blockme, &oldset)
144#define RESTORE_PTHREAD()       pthread_sigmask(SIG_SETMASK, &oldset, NULL)
145#define EXIT_PTHREAD()          RESTORE_PTHREAD(); \
146return -1
147#else
148#define SET_PTHREAD()
149#define RESTORE_PTHREAD()
150#define EXIT_PTHREAD()          return -1
151#endif
152
153#if STATIC_MODULES
154#  define pdsh_module_info mqcmd_module_info
155#  define pdsh_module_priority mqcmd_module_priority
156#endif    
157
158int pdsh_module_priority = DEFAULT_MODULE_PRIORITY;
159
160static int mqcmd_postop(opt_t *opt);
161
162static int mqcmd_opt_m(opt_t *, int, char *);
163static int mqcmd_opt_n(opt_t *, int, char *);
164static int mqcmd_opt_r(opt_t *, int, char *);
165
166static int mqcmd_init(opt_t *);
167static int mqcmd_signal(int, void *, int);
168static int mqcmd(char *, char *, char *, char *, char *, int, int *, void **); 
169
170/* random num for all jobs in this group */
171static unsigned int randy = -1;
172
173/* 
174 * Export pdsh module operations structure
175 */
176struct pdsh_module_operations mqcmd_module_ops = {
177    (ModInitF)       NULL, 
178    (ModExitF)       NULL, 
179    (ModReadWcollF)  NULL, 
180    (ModPostOpF)     mqcmd_postop,
181};
182
183/*
184 *  Export rcmd module operations
185 */
186struct pdsh_rcmd_operations mqcmd_rcmd_ops = {
187    (RcmdInitF)  mqcmd_init,
188    (RcmdSigF)   mqcmd_signal,
189    (RcmdF)      mqcmd,
190};
191
192/* 
193 * Export module options
194 */
195struct pdsh_module_option mqcmd_module_options[] =
196{ { 'm', "block|cyclic", "(mqshell) control assignment of procs to nodes",
197    DSH, (optFunc) mqcmd_opt_m },
198  { 'n', "n",            "(mqshell) set number of tasks per node",
199    DSH, (optFunc) mqcmd_opt_n },
200  { 'r', "railmask",     "(mqshell) set rail bitmask for job on multirail system",
201    DSH, (optFunc) mqcmd_opt_r }, 
202  PDSH_OPT_TABLE_END
203};
204
205/* 
206 * Mqcmd module info 
207 */
208struct pdsh_module pdsh_module_info = {
209    "rcmd",
210    "mqsh",
211    "Jim Garlick <garlick1@llnl.gov>",
212    "Run MPI jobs over QsNet with mrsh authentication",
213    DSH, 
214
215    &mqcmd_module_ops,
216    &mqcmd_rcmd_ops,
217    &mqcmd_module_options[0],
218};
219
220static int
221mqcmd_opt_m(opt_t *pdsh_opts, int opt, char *arg)
222{
223    if (strcmp(arg, "block") == 0)
224        cyclic = false;
225    else if (strcmp(arg, "cyclic") == 0)
226        cyclic = true;
227    else
228        return -1;
229
230    dist_set = true;
231
232    return 0;
233}
234
235static int
236mqcmd_opt_n(opt_t *pdsh_opts, int opt, char *arg)
237{
238    nprocs = atoi(arg);
239    return 0;
240}
241
242static int
243mqcmd_opt_r(opt_t *pdsh_opts, int opt, char *arg)
244{
245    char *p = NULL;
246    long int val = strtol (arg, &p, 0);
247
248    if (*p != '\0')
249        errx ("%p: Invalid value for railmask: \"%s\"\n", arg);
250
251    railmask = (unsigned int) val;
252    railmask_set = true;
253    return (0);
254}
255
256static int mqcmd_postop(opt_t *opt)
257{
258    int errors = 0;
259
260    if (strcmp(opt->rcmd_name, "mqsh") == 0) {
261        if (opt->fanout != DFLT_FANOUT && opt->wcoll != NULL) {
262            if  (opt->fanout != hostlist_count(opt->wcoll)) {
263                err("%p: mqcmd: fanout must = target node list length " 
264                    "with -R mqsh\n");
265                errors++;
266            }
267        }
268        if (nprocs <= 0) {
269            err("%p: -n should be > 0\n");
270            errors++;
271        }
272        if ((railmask == 0) || (railmask > QSW_RAILMASK_MAX)) {
273            err ("%p: mqcmd: invalid value %d for -r railmask\n", railmask);
274            errors++;
275        }
276    } else {
277        if (nprocs != 1) {
278            err("%p: mqcmd: -n can only be specified with -R mqsh\n");
279            errors++;
280        }
281
282        if (dist_set) {
283            err("%p: mqcmd: -m may only be specified with -R mqsh\n");
284            errors++;
285        }
286
287        if (railmask_set) {
288            err("%p: mqcmd: -r may only be specified with -R mqsh\n");
289            errors++;
290        }
291    }
292
293    return errors;
294}
295
296static int
297_mqcmd_opt_init(opt_t *opt)
298{
299    if (opt->fanout == DFLT_FANOUT && opt->wcoll != NULL)
300        opt->fanout = hostlist_count(opt->wcoll);
301    else {
302        err("%p: mqcmd: Unable to set appropriate fanout\n");
303        return -1;
304    }
305
306    opt->labels       = false;
307    opt->kill_on_fail = true;
308
309    if (opt->dshpath != NULL)
310        Free((void **) &opt->dshpath);
311
312    return 0;
313}
314
315/*
316 * Intialize elan capability and info structures that will be used when
317 * running the job.
318 *  wcoll (IN)  list of nodes
319 */
320static int mqcmd_init(opt_t * opt)
321{
322    int totprocs = nprocs * hostlist_count(opt->wcoll);
323    int rv, rand_fd;
324
325    /*
326     * Drop privileges if running setuid root
327     */
328    if ((geteuid() == 0) && (getuid() != 0))
329        setuid (getuid ());
330
331    /*
332     *  Verify constraints for running Elan jobs
333     *    and initialize options.
334     */
335    if (_mqcmd_opt_init(opt) < 0)
336        return -1;
337
338    if (getcwd(cwd, sizeof(cwd)) == NULL) {      /* cache working directory */
339        err("%p: mqcmd: getcwd failed: %m\n");
340        return -1;
341    }
342
343    if (qsw_init() < 0)
344        exit(1);
345
346    /* initialize Elan capability structure. */
347    if (qsw_init_capability(&cap, totprocs, opt->wcoll, cyclic, railmask) < 0) {
348        err("%p: mqcmd: failed to initialize Elan capability\n");
349        return -1;
350    }
351
352    qsw_fini();
353
354    /* initialize elan info structure */
355    qinfo.prgnum = qsw_get_prgnum();    /* call after qsw_init_capability */
356    qinfo.nnodes = hostlist_count(opt->wcoll);
357    qinfo.nprocs = totprocs;
358    qinfo.nodeid = qinfo.procid = qinfo.rank = 0;
359
360    /*
361     * Generate a random number to send in our package to the 
362     * server.  We will see it again and compare it when the
363     * server sets up the stderr socket and sends it to us.
364     * We need to loop for the tiny possibility we read 0 :P
365     */
366    if ((rand_fd = open ("/dev/urandom", O_RDONLY | O_NONBLOCK)) < 0 ) {
367        err("%p: mqcmd: Open of /dev/urandom failed\n");
368        return -1;
369    }
370
371    do {
372        if ((rv = read (rand_fd, &randy, sizeof(uint32_t))) < 0) {
373            close(rand_fd);
374            err("%p: mqcmd: Read of /dev/urandom failed\n");
375            return -1;
376        }
377
378        if (rv < (int) (sizeof(uint32_t))) {
379            close(rand_fd);
380            err("%p: mqcmd: Read returned too few bytes\n");
381            return -1;
382        }
383    } while (randy == 0);
384
385    close(rand_fd);
386
387    return 0;
388}
389
390static int
391mqcmd_signal(int fd, void *arg, int signum)
392{
393    char c;
394
395    if (fd >= 0) {
396        /* set non-blocking mode for write - just take our best shot */
397        if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
398            err("%p: fcntl: %m\n");
399        c = (char) signum;
400        write(fd, &c, 1);
401    }
402    return 0;
403}
404
405/*
406 * Send extra arguments to qshell server
407 *  s (IN)      socket
408 *  nodeid (IN) node index for this connection
409 */
410static int _mqcmd_send_extra_args(int s, int nodeid, const char *ahost)
411{
412    char **ep;
413    char tmpstr[1024];
414    int count = 0;
415    int i;
416
417    /* send current working dir */
418    if (fd_write_n(s, cwd, strlen(cwd) + 1) < 0) {
419        err("%p: %S: error writing cwd: %m\n", ahost);
420        return -1;
421    }
422
423    /* send environment (count followed by variables, each \0-term) */
424    for (ep = environ; *ep != NULL; ep++)
425        count++;
426
427    snprintf(tmpstr, sizeof(tmpstr), "%d", count);
428    if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
429        err("%p: %S: error writing envcount: %m\n", ahost);
430        return -1;
431    }
432
433    for (ep = environ; *ep != NULL; ep++) {
434        if (fd_write_n(s, *ep, strlen(*ep) + 1) < 0) {
435            err("%p: %S: error writing environemtn: %m\n", ahost);
436            return -1;
437        }
438    }
439
440    /* send elan capability */
441    if (qsw_encode_cap(tmpstr, sizeof(tmpstr), &cap) < 0)
442        return -1;
443
444    if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
445        err("%p: %S: error writing elan capability: %m\n", ahost);
446        return -1;
447    }
448
449    for (i = 0; i < qsw_cap_bitmap_count(); i += 16) {
450        if (qsw_encode_cap_bitmap(tmpstr, sizeof(tmpstr), &cap, i) < 0)
451            return -1;
452
453        if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
454            err("%p: %S: error writing bitmap: %m\n", ahost);
455            return -1;
456        }
457    }
458
459    /* send elan info */
460    qinfo.nodeid = qinfo.rank = qinfo.procid = nodeid;
461    if (qsw_encode_info(tmpstr, sizeof(tmpstr), &qinfo) < 0)
462        return -1;
463
464    if (fd_write_n(s, tmpstr, strlen(tmpstr) + 1) < 0) {
465        err("%p: %S: error writing qinfo: %m\n", ahost);
466        return -1;
467    }
468
469    return 0;
470}
471
472/*
473 * Derived from the rcmd() libc call, with modified interface.
474 * This version is MT-safe.  Errors are displayed in pdsh-compat format.
475 * Connection can time out.
476 *      ahost (IN)              target hostname
477 *      addr (IN)               4 byte internet address
478 *      locuser (IN)            not used 
479 *      remuser (IN)            remote username
480 *      cmd (IN)                remote command to execute under shell
481 *      int nodeid (IN)         node index for this connection
482 *      fd2p (IN)               if non NULL, return stderr file descriptor here
483 *      int (RETURN)            -1 on error, socket for I/O on success
484 *
485 * Combination of code derived from mcmd by Mike Haskell, qcmd by
486 * Jim Garlick, and a variety of minor modifications.
487 */
488static int 
489mqcmd(char *ahost, char *addr, char *locuser, char *remuser, char *cmd, 
490        int nodeid, int *fd2p, void **arg)
491{
492    struct sockaddr m_socket;
493    struct sockaddr_in *getp;
494    struct sockaddr_in sin, from;
495    struct sockaddr_storage ss;
496    struct in_addr m_in;
497    unsigned int rand, randl;
498    unsigned char *hptr;
499    int s, s2, rv, mcount, lport;
500    char c;
501    char num[6] = {0};
502    char *mptr;
503    char *mbuf;
504    char *tmbuf;
505    char *m;
506    char *mpvers;
507    char num_seq[12] = {0};
508    socklen_t len;
509    sigset_t blockme;
510    sigset_t oldset;
511    char haddrdot[16] = {0};
512    munge_ctx_t ctx;
513    struct xpollfd xpfds[2];
514
515    sigemptyset(&blockme);
516    sigaddset(&blockme, SIGURG);
517    sigaddset(&blockme, SIGPIPE);
518    SET_PTHREAD();
519
520    if (( rv = strcmp(ahost,"localhost")) == 0 ) {
521        errno = EACCES;
522        err("%p: %S: mqcmd: Can't use localhost\n", ahost);
523        EXIT_PTHREAD();
524    }
525
526    /* Convert randy to decimal string, 0 if we dont' want stderr */
527    if (fd2p != NULL)
528        snprintf(num_seq, sizeof(num_seq),"%d",randy);
529    else
530        snprintf(num_seq, sizeof(num_seq),"%d",0);
531
532    /*
533     * Start setup of the stdin/stdout socket...
534     */
535    lport = 0;
536    len = sizeof(struct sockaddr_in);
537
538    if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
539        err("%p: %S: mqcmd: socket call stdout failed: %m\n", ahost);
540        EXIT_PTHREAD();
541    }
542
543    memset (&ss, '\0', sizeof(ss));
544    ss.ss_family = AF_INET;
545
546    if (bind(s, (struct sockaddr *)&ss, len) < 0) {
547        err("%p: %S: mqcmd: bind failed: %m\n", ahost);
548        goto bad;
549    }
550
551    sin.sin_family = AF_INET;
552
553    memcpy(&sin.sin_addr.s_addr, addr, IP_ADDR_LEN); 
554
555    sin.sin_port = htons(MQSH_PORT);
556    if (connect(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
557        err("%p: %S: mqcmd: connect failed: %m\n", ahost);
558        goto bad;
559    }
560
561    lport = 0;
562    s2 = -1;
563    if (fd2p != NULL) {
564        /*
565         * Start the socket setup for the stderr.
566         */
567        struct sockaddr_in sin2;
568
569        if ((s2 = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
570            err("%p: %S: mqcmd: socket call for stderr failed: %m\n", ahost);
571            goto bad;
572        }
573
574        memset (&sin2, 0, sizeof(sin2));
575        sin2.sin_family = AF_INET;
576        sin2.sin_addr.s_addr = htonl(INADDR_ANY);
577        sin2.sin_port = 0;
578
579        if (bind(s2, (struct sockaddr *)&sin2, sizeof(sin2)) < 0) {
580            err("%p: %S: mqcmd: bind failed: %m\n", ahost);
581            close(s2);
582            goto bad;
583        }
584
585        len = sizeof(struct sockaddr);
586
587        /*
588         * Retrieve our port number so we can hand it to the server
589         * for the return (stderr) connection...
590         */
591
592        /* getsockname is thread safe */
593        if (getsockname(s2,&m_socket,&len) < 0) {
594            err("%p: %S: mqcmd: getsockname failed: %m\n", ahost);
595            close(s2);
596            goto bad;
597        }
598
599        getp = (struct sockaddr_in *)&m_socket;
600        lport = ntohs(getp->sin_port);
601
602        if (listen(s2, 1) < 0) {
603            err("%p: %S: mqcmd: listen() failed: %m\n", ahost);
604            close(s2);
605            goto bad;
606        }
607    }
608
609    snprintf(num,sizeof(num),"%d",lport);
610    memcpy(&m_in.s_addr, addr, IP_ADDR_LEN);
611
612    /* inet_ntoa is not thread safe, so we use the following,
613     * which is more or less ripped from glibc
614     */
615    hptr = (unsigned char *)&m_in;
616    sprintf(haddrdot, "%u.%u.%u.%u", hptr[0], hptr[1], hptr[2], hptr[3]);
617
618    /*
619     * We call munge_encode which will take what we write in and return a
620     * pointer to an munged buffer.  What we get back is a null terminated
621     * string of encrypted characters.
622     * 
623     * The format of the unmunged buffer is as follows (each a string terminated 
624     * with a '\0' (null):
625     *
626     * stderr_port_number & /dev/urandom_client_produce_number are 0
627     * if user did not request stderr socket
628     *
629     *                                              SIZE            EXAMPLE
630     *                                              ==========      =============
631     * remote_user_name                             variable        "mhaskell"
632     * '\0'
633     * dotted_decimal_address_of_this_server        7-15 bytes      "134.9.11.155"
634     * '\0'
635     * stderr_port_number                           4-8 bytes       "50111"
636     * '\0'
637     * /dev/urandom_client_produced_number          1-8 bytes       "1f79ca0e"
638     * '\0'
639     * users_command                                variable        "ls -al"
640     * '\0' '\0'
641     *
642     * (The last extra null is accounted for in the following line's last strlen() call.)
643     */
644
645
646    mcount = ((strlen(remuser)+1) + (strlen(haddrdot)+1) + (strlen(num)+1) + 
647            (strlen(num_seq)+1) + strlen(cmd)+2);
648    tmbuf = mbuf = malloc(mcount);
649    if (tmbuf == NULL) {
650        err("%p: %S: mqcmd: Error from malloc\n", ahost);
651        close(s2);
652        goto bad;
653    }
654    /*
655     * The following memset() call takes the extra trailing null as part of its
656     * count as well.
657     */
658    memset(mbuf,0,mcount);
659
660    mptr = strcpy(mbuf, remuser);
661    mptr += strlen(remuser)+1;
662    mptr = strcpy(mptr, haddrdot);
663    mptr += strlen(haddrdot)+1;
664    mptr = strcpy(mptr, num);
665    mptr += strlen(num)+1;
666    mptr = strcpy(mptr, num_seq);
667    mptr += strlen(num_seq)+1;
668    mptr = strcpy(mptr, cmd);
669
670    if ((ctx = munge_ctx_create()) == NULL)
671        goto bad;
672
673    if ((rv = munge_encode(&m,0,mbuf,mcount)) != EMUNGE_SUCCESS) {
674        err("%p: %S: mqcmd: munge_encode: %S\n", ahost, munge_ctx_strerror(ctx));
675        munge_ctx_destroy(ctx);
676        close(s2);
677        free(tmbuf);
678        goto bad;
679    }
680
681    munge_ctx_destroy(ctx);
682 
683    mcount = (strlen(m)+1);
684
685    /*
686     * Write stderr port in the clear in case we can't decode for
687     * some reason (i.e. bad credentials).  May be 0 if user
688     * doesn't want stderr
689     */
690    if (fd2p != NULL) {
691        rv = fd_write_n(s, num, strlen(num)+1);
692        if (rv != (strlen(num)+1)) {
693            free(m);
694            free(tmbuf);
695            if (errno == EPIPE)
696                err("%p: %S: mqcmd: Lost connection (EPIPE): %m\n", ahost);
697            else
698                err("%p: %S: mqcmd: Write of stderr port failed: %m\n", ahost);
699            close(s2);
700            goto bad;
701        }
702    } else {
703        write(s, "", 1);
704        lport = 0;
705    }
706
707    /*
708     * Write the munge_encoded blob to the socket.
709     */
710    rv = fd_write_n(s, m, mcount);
711    if (rv != mcount) {
712        free(m);
713        free(tmbuf);
714        if (errno == EPIPE)
715            err("%p: %S: mqcmd: Lost connection: %m\n", ahost);
716        else
717            err("%p: %S: mqcmd: Write to socket failed: %m\n", ahost);
718        close(s2);
719        goto bad;
720    }
721
722    free(m);
723    free(tmbuf);
724
725    if (fd2p != NULL) {
726        /*
727         * Wait for stderr connection from daemon.
728         */
729        int s3;
730      
731        errno = 0;
732        xpfds[0].fd = s;
733        xpfds[1].fd = s2;
734        xpfds[0].events = xpfds[1].events = XPOLLREAD;
735        if (  ((rv = xpoll(xpfds, 2, -1)) < 0) 
736            || rv != 1 
737            || (xpfds[0].revents > 0)) {
738            if (errno != 0)
739                err("%p: %S: mqcmd: xpoll (setting up stderr): %m\n", ahost);
740            else
741                err("%p: %S: mqcmd: xpoll: protocol failure in circuit setup\n", ahost);
742            (void) close(s2);
743            goto bad;
744        }
745
746        errno = 0;
747        len = sizeof(from); /* arg to accept */
748
749        if ((s3 = accept(s2, (struct sockaddr *)&from, &len)) < 0) {
750            close(s2);
751            err("%p: %S: mqcmd: accept (stderr) failed: %m\n", ahost);
752            goto bad;
753        }
754
755        if (from.sin_family != AF_INET) {
756            err("%p: %S: mqcmd: bad family type: %d\n", ahost, from.sin_family);
757            goto bad2;
758        }
759
760        close(s2);
761
762        /*
763         * The following fixes a race condition between the daemon
764         * and the client.  The daemon is waiting for a null to
765         * proceed.  We do this to make sure that we have our
766         * socket is up prior to the daemon running the command.
767         */
768        if (write(s,"",1) < 0) {
769            err("%p: %S: mqcmd: Could not communicate to daemon to proceed: %m\n", ahost);
770            close(s3);
771            goto bad;
772        }
773
774        /*
775         * Read from our stderr.  The server should have placed our random number
776         * we generated onto this socket.
777         */
778        rv = fd_read_n(s3, &rand, sizeof(rand));
779        if (rv != (ssize_t) (sizeof(rand))) {
780            err("%p: %S: mqcmd: Bad read of expected verification "
781                    "number off of stderr socket: %m\n", ahost);
782            close(s3);
783            goto bad;
784        }
785
786        randl = ntohl(rand);
787        if (randl != randy) {
788            char tmpbuf[LINEBUFSIZE] = {0};
789            char *tptr = &tmpbuf[0];
790
791            memcpy(tptr,(char *) &rand,sizeof(rand));
792            tptr += sizeof(rand);
793            rv = fd_read_line (s3, tptr, LINEBUFSIZE);
794            if (rv < 0)
795                err("%p: %S: mqcmd: Bad read of error from stderr: %m\n", ahost);
796            else
797                err("%p: %S: mqcmd: %s\n", ahost, &tmpbuf[0]);
798            close(s3);
799            goto bad;
800        }
801
802        /*
803         * Set the stderr file descriptor for the user...
804         */
805        *fd2p = s3;
806    }
807
808    /* send extra information */
809    if (_mqcmd_send_extra_args(s, nodeid, ahost) < 0) {
810        err("%p: %S: mqcmd: error sending extra args\n", ahost);
811        goto bad2;
812    }
813
814    if ((rv = read(s, &c, 1)) < 0) {
815        err("%p: %S: mqcmd: read: protocol failure: %m\n", ahost);
816        goto bad2;
817    }
818
819    if (rv != 1) {
820        err("%p: %S: mqcmd: read: protocol failure: invalid response\n", ahost);
821        goto bad2;
822    }
823
824    if (c != '\0') {
825        /* retrieve error string from remote server */
826        char tmpbuf[LINEBUFSIZE];
827
828        if (fd_read_line (s, &tmpbuf[0], LINEBUFSIZE) < 0)
829            err("%p: %S: mqcmd: Error from remote host\n", ahost);
830        else
831            err("%p: %S: %s\n", ahost, tmpbuf);
832        goto bad2;
833    }
834    RESTORE_PTHREAD();
835
836    return (s);
837
838bad2:
839    if (lport)
840        close(*fd2p);
841bad:
842    close(s);
843    EXIT_PTHREAD();
844}
845
846/*
847 * vi:tabstop=4 shiftwidth=4 expandtab
848 */