PageRenderTime 74ms CodeModel.GetById 13ms app.highlight 55ms RepoModel.GetById 1ms app.codeStats 0ms

/src/modules/qcmd.c

https://code.google.com/
C | 545 lines | 364 code | 66 blank | 115 comment | 90 complexity | 6f693675132fec5db51e2b8de88db2d1 MD5 | raw file
  1/*****************************************************************************\
  2 *  $Id$
  3 *****************************************************************************
  4 *  Copyright (C) 2001-2006 The Regents of the University of California.
  5 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6 *  Written by Jim Garlick <garlick@llnl.gov>.
  7 *  UCRL-CODE-2003-005.
  8 *  
  9 *  This file is part of Pdsh, a parallel remote shell program.
 10 *  For details, see <http://www.llnl.gov/linux/pdsh/>.
 11 *  
 12 *  Pdsh is free software; you can redistribute it and/or modify it under
 13 *  the terms of the GNU General Public License as published by the Free
 14 *  Software Foundation; either version 2 of the License, or (at your option)
 15 *  any later version.
 16 *  
 17 *  Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
 18 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 19 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 20 *  details.
 21 *  
 22 *  You should have received a copy of the GNU General Public License along
 23 *  with Pdsh; if not, write to the Free Software Foundation, Inc.,
 24 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 25\*****************************************************************************/
 26
 27/*
 28 * This code is based on the BSD rcmd.c with MT safety added, and the 
 29 * interface changed.  Original UC regents header included below.
 30 */
 31
 32/*
 33 * Copyright (c) 1983, 1993, 1994
 34 *	The Regents of the University of California.  All rights reserved.
 35 *
 36 * Redistribution and use in source and binary forms, with or without
 37 * modification, are permitted provided that the following conditions
 38 * are met:
 39 * 1. Redistributions of source code must retain the above copyright
 40 *    notice, this list of conditions and the following disclaimer.
 41 * 2. Redistributions in binary form must reproduce the above copyright
 42 *    notice, this list of conditions and the following disclaimer in the
 43 *    documentation and/or other materials provided with the distribution.
 44 * 3. All advertising materials mentioning features or use of this software
 45 *    must display the following acknowledgement:
 46 *	This product includes software developed by the University of
 47 *	California, Berkeley and its contributors.
 48 * 4. Neither the name of the University nor the names of its contributors
 49 *    may be used to endorse or promote products derived from this software
 50 *    without specific prior written permission.
 51 *
 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 62 * SUCH DAMAGE.
 63 */
 64
 65#if defined(LIBC_SCCS) && !defined(lint)
 66static char sccsid[] = "@(#)rcmd.c	8.3 (Berkeley) 3/26/94";
 67#endif                          /* LIBC_SCCS and not lint */
 68
 69#if     HAVE_CONFIG_H
 70#include "config.h"
 71#endif
 72
 73#include <sys/param.h>
 74#include <sys/types.h>
 75#include <sys/time.h>
 76#include <sys/socket.h>
 77#include <sys/stat.h>
 78#if	HAVE_PTHREAD_H
 79#include <pthread.h>
 80#endif
 81#include <netinet/in.h>
 82#include <arpa/inet.h>
 83#include <signal.h>
 84#if HAVE_FCNTL_H
 85#include <fcntl.h>
 86#endif
 87#include <netdb.h>
 88#if HAVE_UNISTD_H
 89#include <unistd.h>
 90#endif
 91#include <pwd.h>
 92#include <errno.h>
 93#include <ctype.h>
 94#include <string.h>
 95
 96#include <stdio.h>
 97#include <string.h>
 98#include <stdlib.h>
 99
100#include <elan3/elanvp.h>
101
102#include "src/common/xmalloc.h"
103#include "src/common/xstring.h"
104#include "src/common/list.h"
105#include "src/common/err.h"
106#include "src/common/macros.h"                /* LINEBUFSIZE */
107#include "src/common/xpoll.h"
108#include "src/pdsh/mod.h"
109#include "src/pdsh/privsep.h"
110#include "src/qsnet/qswutil.h"
111
112#define QSHELL_PORT 523
113
114#if HAVE_GETHOSTBYNAME_R
115#define HBUF_LEN	1024
116#endif
117
118#if STATIC_MODULES
119#  define pdsh_module_info qcmd_module_info
120#  define pdsh_module_priority qcmd_module_priority
121#endif    
122
123int pdsh_module_priority = DEFAULT_MODULE_PRIORITY;
124
125extern char **environ;
126
127static bool dist_set   = false;
128static bool cyclic     = false;
129static int  nprocs     = 1;
130
131static unsigned int railmask     = 1;
132static bool         railmask_set = false;
133
134static char cwd[MAXPATHLEN + 1];
135static qsw_info_t qinfo;
136static ELAN_CAPABILITY cap;
137
138static int qcmd_postop(opt_t *opt);
139
140static int qcmd_opt_m(opt_t *, int, char *);
141static int qcmd_opt_n(opt_t *, int, char *);
142static int qcmd_opt_r(opt_t *, int, char *);
143
144static int qcmd_init(opt_t *);
145static int qcmd_signal(int, void *, int);
146static int qcmd(char *, char *, char *, char *, char *, int, int *, void **); 
147
148/*
149 *  Export generic pdsh module operations
150 */
151struct pdsh_module_operations qcmd_module_ops = {
152    (ModInitF)       NULL,
153    (ModExitF)       NULL,
154    (ModReadWcollF)  NULL,
155    (ModPostOpF)     qcmd_postop
156};
157
158/*
159 *  Export rcmd module operations
160 */
161struct pdsh_rcmd_operations qcmd_rcmd_ops = {
162    (RcmdInitF)  qcmd_init,
163    (RcmdSigF)   qcmd_signal,
164    (RcmdF)      qcmd,
165};
166
167/* 
168 * Export module options
169 */
170struct pdsh_module_option qcmd_module_options[] =
171 { { 'm', "block|cyclic", "(qshell) control assignment of procs to nodes",
172     DSH, (optFunc) qcmd_opt_m },
173   { 'n', "n",            "(qshell) set number of tasks per node",
174     DSH, (optFunc) qcmd_opt_n },
175  { 'r', "railmask",      "(qshell) set railmask for job on multirail system",
176     DSH, (optFunc) qcmd_opt_r }, 
177   PDSH_OPT_TABLE_END
178 };
179
180/* 
181 * Qcmd module info 
182 */
183struct pdsh_module pdsh_module_info = {
184  "rcmd",
185  "qsh",
186  "Jim Garlick <garlick@llnl.gov>",
187  "Run MPI jobs over QsNet",
188  DSH, 
189
190  &qcmd_module_ops,
191  &qcmd_rcmd_ops,
192  &qcmd_module_options[0],
193};
194
195static int
196qcmd_opt_m(opt_t *pdsh_opts, int opt, char *arg)
197{
198    if (strcmp(arg, "block") == 0)
199        cyclic = false;
200    else if (strcmp(arg, "cyclic") == 0)
201        cyclic = true;
202    else 
203        return -1;
204
205    dist_set = true;
206
207    return 0;
208}
209
210static int 
211qcmd_opt_n(opt_t *pdsh_opts, int opt, char *arg)
212{
213    nprocs = atoi(arg);
214    return 0;
215}
216
217static int
218qcmd_opt_r(opt_t *pdsh_opts, int opt, char *arg)
219{
220    char *p = NULL;
221    long int val = strtol (arg, &p, 0);
222
223    if (*p != '\0')
224        errx ("%p: Invalid value for railmask: \"%s\"\n", arg);
225
226    railmask = (unsigned int) val;
227    railmask_set = true;
228    return (0);
229}
230
231
232/*
233 * Use rcmd backchannel to propagate signals.
234 *      efd (IN)        file descriptor connected socket (-1 if not used)
235 *      signum (IN)     signal number to send
236 */
237static int qcmd_signal(int efd, void *arg, int signum)
238{
239    char c;
240
241    if (efd >= 0) {
242        /* set non-blocking mode for write - just take our best shot */
243        if (fcntl(efd, F_SETFL, O_NONBLOCK) < 0)
244            err("%p: fcntl: %m\n");
245        c = (char) signum;
246        write(efd, &c, 1);
247    }
248
249    return 0;
250}
251
252
253static int qcmd_postop(opt_t *opt)
254{
255    int errors = 0;
256
257    if (strcmp(opt->rcmd_name, "qsh") == 0) {
258        if (opt->fanout != DFLT_FANOUT && opt->wcoll != NULL) {
259            if  (opt->fanout != hostlist_count(opt->wcoll)) {
260                err("%p: fanout must = target node list length \"-R qsh\"\n");
261                errors++;
262            }
263        }
264        if (nprocs <= 0) {
265            err("%p: -n should be > 0\n");
266            errors++;
267        }
268        if ((railmask == 0) || (railmask > QSW_RAILMASK_MAX)) {
269            err ("%p: qcmd: invalid value %d for -r (railmask)\n", railmask);
270            errors++;
271        }
272    } else {
273        if (nprocs != 1) {
274            err("%p: -n can only be specified with \"-R qsh\"\n"); 
275            errors++;
276        }
277
278        if (dist_set) {
279            err("%p: -m may only be specified with \"-R qsh\"\n");
280            errors++;
281        }
282
283        if (railmask_set) {
284            err("%p: qcmd: -r may only be specified with -R mqsh\n");
285            errors++;
286        }
287    }
288
289    return errors;
290}
291
292
293static void
294_qcmd_opt_init(opt_t *opt)
295{
296    if (opt->fanout == DFLT_FANOUT && opt->wcoll != NULL)
297        opt->fanout = hostlist_count(opt->wcoll);
298    else {
299        err("%p: qcmd: Unable to set appropriate fanout\n");
300        exit(1);
301    }
302
303    opt->labels       = false;
304    opt->kill_on_fail = true;
305
306    if (opt->dshpath != NULL)
307        Free((void **) &opt->dshpath);
308}
309
310
311/* 
312 * Intialize elan capability and info structures that will be used when
313 * running the job.
314 * 	wcoll (IN)	list of nodes
315 */
316static int qcmd_init(opt_t * opt)
317{
318    int totprocs = nprocs * hostlist_count(opt->wcoll);
319
320    if (qsw_init() < 0)
321        exit(1);
322
323    /*
324     *  Verify constraints for running Elan jobs
325     *    and initialize options.
326     */
327    _qcmd_opt_init(opt);
328
329    if (getcwd(cwd, sizeof(cwd)) == NULL)       /* cache working directory */
330        errx("%p: getcwd failed: %m\n");
331
332    /* initialize Elan capability structure. */
333    if (qsw_init_capability(&cap, totprocs, opt->wcoll, cyclic, railmask) < 0)
334        errx("%p: failed to initialize Elan capability\n");
335
336    /* initialize elan info structure */
337    qinfo.prgnum = qsw_get_prgnum();    /* call after qsw_init_capability */
338    qinfo.nnodes = hostlist_count(opt->wcoll);
339    qinfo.nprocs = totprocs;
340    qinfo.nodeid = qinfo.procid = qinfo.rank = 0;
341
342    qsw_fini();
343    
344    return 0;
345}
346
347/*
348 * Send extra arguments to qshell server
349 *	s (IN)		socket 
350 *	nodeid (IN)	node index for this connection
351 */
352static int _qcmd_send_extra_args(int s, int nodeid)
353{
354    char **ep;
355    char tmpstr[1024];
356    int count = 0;
357    int i;
358
359    /* send current working dir */
360    (void) write(s, cwd, strlen(cwd) + 1);
361
362    /* send environment (count followed by variables, each \0-term) */
363    for (ep = environ; *ep != NULL; ep++)
364        count++;
365    snprintf(tmpstr, sizeof(tmpstr), "%d", count);
366    (void) write(s, tmpstr, strlen(tmpstr) + 1);
367    for (ep = environ; *ep != NULL; ep++)
368        (void) write(s, *ep, strlen(*ep) + 1);
369
370    /* send elan capability */
371    if (qsw_encode_cap(tmpstr, sizeof(tmpstr), &cap) < 0)
372        return -1;
373    (void) write(s, tmpstr, strlen(tmpstr) + 1);
374    for (i = 0; i < qsw_cap_bitmap_count(); i += 16) {
375        if (qsw_encode_cap_bitmap(tmpstr, sizeof(tmpstr), &cap, i) < 0)
376            return -1;
377        (void) write(s, tmpstr, strlen(tmpstr) + 1);
378    }
379
380    /* send elan info */
381    qinfo.nodeid = qinfo.rank = qinfo.procid = nodeid;
382    if (qsw_encode_info(tmpstr, sizeof(tmpstr), &qinfo) < 0)
383        return -1;
384    (void) write(s, tmpstr, strlen(tmpstr) + 1);
385
386    return 0;
387}
388
389/*
390 * Derived from the rcmd() libc call, with modified interface.
391 * This version is MT-safe.  Errors are displayed in pdsh-compat format.
392 * Connection can time out.
393 *	ahost (IN)		target hostname
394 *	locuser (IN)		local username
395 *	remuser (IN)		remote username
396 *	cmd (IN)		remote command to execute under shell
397 *	nodeid (IN)		node index for this connection
398 *	fd2p (IN)		if non NULL, return stderr file descriptor here
399 *	int (RETURN)		-1 on error, socket for I/O on success
400 */
401static int
402qcmd(char *ahost, char *addr, char *locuser, char *remuser, char *cmd,
403     int nodeid, int *fd2p, void **arg)
404{
405    struct sockaddr_in sin, from;
406    sigset_t oldset, blockme;
407    pid_t pid;
408    int s, lport, timo, rv;
409    char c;
410    struct xpollfd xpfds[2];
411
412    pid = getpid();
413    sigemptyset(&blockme);
414    sigaddset(&blockme, SIGURG);
415    pthread_sigmask(SIG_BLOCK, &blockme, &oldset);
416    for (timo = 1, lport = IPPORT_RESERVED - 1;;) {
417        s = privsep_rresvport(&lport);
418        if (s < 0) {
419            if (errno == EAGAIN)
420                err("%p: %S: qcmd: socket: all ports in use\n", ahost);
421            else
422                err("%p: %S: qcmd: socket: %m\n", ahost);
423            pthread_sigmask(SIG_SETMASK, &oldset, NULL);
424            return (-1);
425        }
426        fcntl(s, F_SETOWN, pid);
427        sin.sin_family = AF_INET;
428        memcpy(&sin.sin_addr, addr, IP_ADDR_LEN);
429        sin.sin_port = htons(QSHELL_PORT);
430        rv = connect(s, (struct sockaddr *) &sin, sizeof(sin));
431        if (rv >= 0)
432            break;
433        (void) close(s);
434        if (errno == EADDRINUSE) {
435            lport--;
436            continue;
437        }
438        if (errno == ECONNREFUSED && timo <= 16) {
439            (void) sleep(timo);
440            timo *= 2;
441            continue;
442        }
443        if (errno == EINTR)
444            err("%p: %S: connect: timed out\n", ahost);
445        else
446            err("%p: %S: connect: %m\n", ahost);
447        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
448        return (-1);
449    }
450    lport--;
451    if (fd2p == 0) {
452        write(s, "", 1);
453        lport = 0;
454    } else {
455        char num[8];
456        int s2 = privsep_rresvport(&lport), s3;
457        socklen_t len = sizeof(from);   /* arg to accept */
458
459        if (s2 < 0)
460            goto bad;
461        listen(s2, 1);
462        (void) snprintf(num, sizeof(num), "%d", lport);
463        if (write(s, num, strlen(num) + 1) != strlen(num) + 1) {
464            err("%p: %S: qcmd: write (setting up stderr): %m\n", ahost);
465            (void) close(s2);
466            goto bad;
467        }
468        errno = 0;
469        xpfds[0].fd = s;
470        xpfds[1].fd = s2;
471        xpfds[0].events = xpfds[1].events = XPOLLREAD;
472        if (((rv = xpoll(xpfds, 2, -1)) < 0) || rv != 1 || (xpfds[0].revents > 0)) {
473          if (errno != 0)
474            err("%p: %S: qcmd: xpoll (setting up stderr): %m\n", ahost);
475          else
476            err("%p: %S: qcmd: xpoll: protocol failure in circuit setup\n", ahost);
477          (void) close(s2);
478          goto bad;
479        }
480        s3 = accept(s2, (struct sockaddr *) &from, &len);
481        (void) close(s2);
482        if (s3 < 0) {
483            err("%p: %S: qcmd: accept: %m\n", ahost);
484            lport = 0;
485            goto bad;
486        }
487        *fd2p = s3;
488        from.sin_port = ntohs((u_short) from.sin_port);
489        if (from.sin_family != AF_INET ||
490            from.sin_port >= IPPORT_RESERVED ||
491            from.sin_port < IPPORT_RESERVED / 2) {
492            err("%p: %S: socket: protocol failure in circuit setup\n",
493                ahost);
494            goto bad2;
495        }
496    }
497    (void) write(s, locuser, strlen(locuser) + 1);
498    (void) write(s, remuser, strlen(remuser) + 1);
499    (void) write(s, cmd, strlen(cmd) + 1);
500    if (_qcmd_send_extra_args(s, nodeid) < 0)
501        goto bad2;
502
503    rv = read(s, &c, 1);
504    if (rv < 0) {
505        if (errno == EINTR)
506            err("%p: %S: read: protocol failure: %s\n",
507                ahost, "timed out");
508        else
509            err("%p: %S: read: protocol failure: %m\n", ahost);
510        goto bad2;
511    } else if (rv != 1) {
512        err("%p: %S: read: protocol failure: %s\n",
513            ahost, "invalid response");
514        goto bad2;
515    }
516    if (c != 0) {
517        /* retrieve error string from remote server */
518        char tmpbuf[LINEBUFSIZE];
519        char *p = tmpbuf;
520
521        while (read(s, &c, 1) == 1) {
522            *p++ = c;
523            if (c == '\n')
524                break;
525        }
526        if (c != '\n')
527            *p++ = '\n';
528        *p++ = '\0';
529        err("%S: %s", ahost, tmpbuf);
530        goto bad2;
531    }
532    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
533    return (s);
534  bad2:
535    if (lport)
536        (void) close(*fd2p);
537  bad:
538    (void) close(s);
539    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
540    return (-1);
541}
542
543/*
544 * vi:tabstop=4 shiftwidth=4 expandtab
545 */