PageRenderTime 63ms CodeModel.GetById 35ms app.highlight 24ms RepoModel.GetById 1ms app.codeStats 0ms

/src/modules/sdr.c

https://code.google.com/
C | 555 lines | 365 code | 111 blank | 79 comment | 82 complexity | ab9f72e0815c7521e1b5cbdf893d0a3c MD5 | raw file
  1/*****************************************************************************\
  2 *  $Id$
  3 *****************************************************************************
  4 *  Copyright (C) 2001-2006 The Regents of the University of California.
  5 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  6 *  Written by Jim Garlick <garlick@llnl.gov>.
  7 *  UCRL-CODE-2003-005.
  8 *  
  9 *  This file is part of Pdsh, a parallel remote shell program.
 10 *  For details, see <http://www.llnl.gov/linux/pdsh/>.
 11 *  
 12 *  Pdsh is free software; you can redistribute it and/or modify it under
 13 *  the terms of the GNU General Public License as published by the Free
 14 *  Software Foundation; either version 2 of the License, or (at your option)
 15 *  any later version.
 16 *  
 17 *  Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY
 18 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 19 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 20 *  details.
 21 *  
 22 *  You should have received a copy of the GNU General Public License along
 23 *  with Pdsh; if not, write to the Free Software Foundation, Inc.,
 24 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 25\*****************************************************************************/
 26#if HAVE_CONFIG_H
 27#  include "config.h"
 28#endif
 29
 30#include <assert.h>
 31#include <string.h>
 32
 33#include "src/pdsh/wcoll.h"
 34#include "src/pdsh/mod.h"
 35#include "src/pdsh/xpopen.h"
 36#include "src/common/xmalloc.h"
 37#include "src/common/err.h"
 38#include "src/common/xstring.h"
 39
 40#define SPACES "\t\n "
 41
 42#define LINEBUFSIZE 2048
 43/* some handy SP constants */
 44/* NOTE: degenerate case of one node per frame, nodes would be 1, 17, 33,... */
 45#define MAX_SP_NODES        512
 46#define MAX_SP_NODES_PER_FRAME  16
 47#define MAX_SP_NODE_NUMBER (MAX_SP_NODES * MAX_SP_NODES_PER_FRAME - 1)
 48
 49#if STATIC_MODULES
 50#  define pdsh_module_info sdr_module_info
 51#  define pdsh_module_priority sdr_module_priority
 52#endif    
 53
 54int pdsh_module_priority = DEFAULT_MODULE_PRIORITY;
 55
 56static int sdr_init (void);
 57static int sdr_exit (void);
 58static hostlist_t read_sdr (opt_t *opt);
 59static int sdr_postop (opt_t *);
 60
 61static int sdr_process_opt(opt_t *, int, char *);
 62
 63/*
 64 *  Export generic module functions
 65 */
 66struct pdsh_module_operations sdr_module_ops = {
 67    (ModInitF)       sdr_init, 
 68    (ModExitF)       sdr_exit, 
 69    (ModReadWcollF)  read_sdr,
 70    (ModPostOpF)     sdr_postop,
 71};
 72
 73/* 
 74 * Export module options
 75 */
 76struct pdsh_module_option sdr_module_options[] = 
 77 { { 'a', NULL, "target all nodes", 
 78     DSH | PCP, (optFunc) sdr_process_opt
 79   },
 80   { 'v', NULL, "verify nodes are up using host/switch_responds",
 81     DSH | PCP, (optFunc) sdr_process_opt
 82   },
 83   { 'i', NULL, "translate to alternate/initial hostnames from SDR (if applicable)",
 84     DSH | PCP, (optFunc) sdr_process_opt
 85   },
 86   { 'G', NULL, "with -a, run on all SP partitions",
 87     DSH | PCP, (optFunc) sdr_process_opt
 88   },
 89   PDSH_OPT_TABLE_END
 90 };
 91
 92/* 
 93 * Sdr module info 
 94 */
 95struct pdsh_module pdsh_module_info = {
 96  "misc",
 97  "sdr",
 98  "Jim Garlick <garlick@llnl.gov>",
 99  "Support for SDR on IBM SP",
100  DSH | PCP, 
101
102  &sdr_module_ops,
103  NULL,
104  &sdr_module_options[0],
105};
106
107
108
109/*
110 *  Data cache for SDR information.
111 *  XXX: Hash by node number instead of leaving room for
112 *       all possible nodes
113 */
114struct sdr_info {
115    char *hostname;
116    char *reliable_hostname;
117    bool switch_responds;
118    bool host_responds;
119};
120
121static bool sdr_initialized = false;
122static struct sdr_info * sdrcache[MAX_SP_NODE_NUMBER];
123
124/* 
125 *  Global options
126 */
127static bool allnodes = false;
128static bool altnames = false;
129static bool verify   = false;
130static bool global   = false;
131
132
133/*
134 *  Required static forward declarations
135 */
136static struct sdr_info * sdr_info_create (char *host, char *rhost);
137static void sdr_info_destroy (struct sdr_info *s);
138
139static hostlist_t _sdr_filter (hostlist_t hl, bool iopt, bool verify);
140static hostlist_t _sdr_wcoll(bool Gopt);
141static struct sdr_info * _find_node (const char *name, int *rhost);
142static hostlist_t _sdr_reliable_names (void);
143static void _sdr_getnames(bool Gopt);
144static void _sdr_getresp (bool Gopt);
145
146static char *_list_nth(List l, int n);
147
148/*
149 * module interface functions
150 */
151
152static int sdr_init (void)
153{
154	int i;
155	for (i = 0; i < MAX_SP_NODE_NUMBER; i++)
156		sdrcache[i] = NULL;
157	return (0);
158}
159
160static int sdr_exit (void)
161{
162    int i;
163    for (i = 0; i < MAX_SP_NODE_NUMBER; i++)
164        sdr_info_destroy (sdrcache[i]);
165    return (0);
166}
167
168static int sdr_process_opt(opt_t *pdsh_opt, int opt, char *arg)
169{
170    switch (opt) {
171     case 'a': 
172        allnodes = true; 
173        break;
174     case 'i': 
175        altnames = true; 
176        break;
177     case 'v': 
178        verify = true;   
179        break;
180     case 'G': 
181        global = true;
182        break;
183     default:  
184        errx("%p: badness factor high in sdr module\n");
185        break;
186    }
187    return 0;
188}
189
190
191static hostlist_t read_sdr(opt_t *opt)
192{
193	if (!allnodes)
194		return (NULL); 
195
196    if (allnodes && opt->wcoll)
197        errx("%p: Do not specify -a with other node selection options\n");
198
199    return _sdr_wcoll (global);
200}
201
202static int sdr_postop (opt_t *opt)
203{
204    hostlist_t hl; 
205
206    if (!verify && !altnames)
207        return (0);
208
209    if (!opt->wcoll || (hostlist_count (opt->wcoll) == 0))
210        return (0);
211
212    if (!sdr_initialized)
213        _sdr_getnames (global);
214
215    if (verify) 
216        _sdr_getresp (global);
217
218    hl = _sdr_filter (opt->wcoll, altnames, verify);
219    hostlist_destroy (opt->wcoll);
220    opt->wcoll = hl;
221
222    return (0);
223}
224
225/*
226 * Other functions
227 */
228
229
230/*
231 * Get the wcoll from the SDR.  
232 *      Gopt (IN)       pass -G to SDRGetObjects
233 *      RETURN          new list containing hostnames (reliable by default)
234 */
235static hostlist_t _sdr_wcoll (bool Gopt)
236{
237	/*
238	 *  Cache SDR reliable and initial hostnames
239	 */
240	_sdr_getnames (Gopt);
241
242    return _sdr_reliable_names ();
243}
244
245
246/*
247 * Filter hostlist `hl' using SDR attributes.
248 *     iopt     convert reliable hostnames to initial and vice versa.
249 *     verify   remove hosts that are not responding on the corresponding
250 *              interface (i.e. switch for initial hostnames, eth otherwise)
251 *     RETURN   new list containing filtered hosts.
252 */
253static hostlist_t _sdr_filter (hostlist_t hl, bool iopt, bool verify)
254{
255    char *host = NULL;
256    hostlist_t new  = hostlist_create (NULL);
257    hostlist_iterator_t i = hostlist_iterator_create (hl);
258    struct sdr_info *s = NULL;
259
260    while ((host = hostlist_next (i))) {
261        int r = 0;
262
263        if ((s = _find_node (host, &r)) == NULL) {
264            hostlist_push_host (new, host);
265            continue;
266        }
267
268        if (iopt) 
269            r = !r;
270
271        if (!verify || (r ? s->host_responds : s->switch_responds))
272           hostlist_push_host (new, r ? s->reliable_hostname : s->hostname);
273
274        free (host);
275    }
276
277    hostlist_iterator_destroy (i);
278
279    return (new);
280}
281
282static void _sdr_getswitchname(char *switchName, int len)
283{
284    FILE *f;
285    List words;
286    char cmd[LINEBUFSIZE];
287    char buf[LINEBUFSIZE];
288
289    snprintf(cmd, sizeof(cmd), "%s -x Switch switch_number==1 switch_name",
290             _PATH_SDRGETOBJECTS);
291    f = xpopen(cmd, "r");
292    if (f == NULL)
293        errx("%p: error running %s\n", _PATH_SDRGETOBJECTS);
294    while (fgets(buf, LINEBUFSIZE, f) != NULL) {
295        words = list_split(NULL, buf);
296        assert(list_count(words) == 1);
297        snprintf(switchName, len, _list_nth(words, 0));
298        list_destroy(words);
299    }
300    xpclose(f);
301}
302
303static char * _sdr_switch_attr (int *numswitchplanes)
304{
305    FILE *f;
306    List words;
307    char cmd[LINEBUFSIZE];
308    char buf[LINEBUFSIZE];
309    int n;
310
311	static char * attr[] = {
312		"switch_responds",
313		"switch_responds0",
314		"switch_responds0 switch_responds1"
315	};
316
317	_sdr_getswitchname(buf, sizeof(buf));
318	if (strcmp(buf, "SP_Switch2") != 0) {
319		*numswitchplanes = 1;
320		return (attr[0]);
321	}
322 
323    snprintf(cmd, sizeof(cmd), "%s -x SP number_switch_planes",
324             _PATH_SDRGETOBJECTS);
325
326    f = xpopen(cmd, "r");
327
328    if (f == NULL)
329        errx("%p: error running %s\n", _PATH_SDRGETOBJECTS);
330    while (fgets(buf, LINEBUFSIZE, f) != NULL) {
331        words = list_split(NULL, buf);
332        assert(list_count(words) == 1);
333        n = atoi(_list_nth(words, 0));
334        list_destroy(words);
335    }
336    if (xpclose(f) != 0)
337        err("%p: nonzero return code from %s\n", _PATH_SDRGETOBJECTS);
338
339	*numswitchplanes = n;
340	return (attr[n]);
341}
342
343static void _sdr_cache_hresp_line (char *buf)
344{
345	List words = NULL;
346	int  nn    = -1;
347
348	words = list_split (NULL, buf);
349	assert(list_count (words) == 2);
350
351	nn = atoi (_list_nth (words, 0));
352	assert (nn >= 0 && nn <= MAX_SP_NODE_NUMBER);
353    /* 
354     * Ignore host_responds for hosts without node information
355     */
356    if (sdrcache[nn] != NULL)
357        sdrcache[nn]->host_responds = (atoi (_list_nth (words, 1)) == 1);
358
359	return;
360}
361
362static void _sdr_cache_sresp_line (char *buf, int switchplanes)
363{
364	List words = NULL;
365	int  nn    = -1;
366	struct sdr_info *s;
367
368	words = list_split (NULL, buf);
369	assert(list_count (words) == (1 + switchplanes));
370
371	nn = atoi (_list_nth (words, 0));
372	assert (nn >= 0 && nn <= MAX_SP_NODE_NUMBER);
373	assert (sdrcache[nn] != NULL);
374
375	s = sdrcache[nn];
376
377	s->switch_responds = (atoi(_list_nth(words, 1)) == 1);
378	if (switchplanes == 2)
379		s->switch_responds = s->switch_responds || (atoi(_list_nth (words, 1)));
380
381	return;
382}
383
384static void _sdr_cache_name_line (char *buf)
385{
386	char *name  = NULL;
387	char *rname = NULL;
388	List words  = NULL;
389	int  nn     = -1; 
390    char *p;
391
392	words = list_split (NULL, buf);
393	assert (list_count(words) == 3);
394
395	nn = atoi (_list_nth (words, 0));
396	assert (nn >= 0 && nn <= MAX_SP_NODE_NUMBER);
397
398	name = _list_nth (words, 1);
399	rname = _list_nth (words, 2);
400
401	if ((p = strchr (name, '.')))
402		*p = '\0';
403
404	if ((p = strchr (rname, '.')))
405		*p = '\0';
406
407	sdrcache[nn] = sdr_info_create (name, rname);
408
409	list_destroy (words);
410	return;
411}
412
413static void _sdr_getresp (bool Gopt)
414{
415    FILE *f;
416    char cmd[LINEBUFSIZE];
417    char buf[LINEBUFSIZE];
418	int nswitchplanes;
419
420    snprintf (cmd, sizeof(cmd), 
421			 "%s %s -x host_responds node_number host_responds",
422             _PATH_SDRGETOBJECTS, Gopt ? "-G" : "");
423
424    if ((f = xpopen (cmd, "r")) == NULL)
425        errx("%p: error running %s\n", _PATH_SDRGETOBJECTS);
426
427    while (fgets (buf, LINEBUFSIZE, f) != NULL) 
428		_sdr_cache_hresp_line (buf);
429
430    snprintf (cmd, sizeof(cmd), 
431			 "%s %s -x switch_responds node_number %s",
432             _PATH_SDRGETOBJECTS, Gopt ? "-G" : "", 
433			 _sdr_switch_attr (&nswitchplanes));
434
435    if ((f = xpopen (cmd, "r")) == NULL)
436        errx("%p: error running %s\n", _PATH_SDRGETOBJECTS);
437
438    while (fgets (buf, LINEBUFSIZE, f) != NULL) 
439		_sdr_cache_sresp_line (buf, nswitchplanes);
440
441	xpclose (f);
442
443	return;
444}
445
446/*
447 * Query the SDR for hostnames of all nodes and return the results in an 
448 * array indexed by node number.
449 *      Gopt (IN)       pass -G to SDRGetObjects
450 */
451static void _sdr_getnames(bool Gopt)
452{
453    FILE *f;
454    char cmd[LINEBUFSIZE];
455    char buf[LINEBUFSIZE];
456
457    snprintf (cmd, sizeof(cmd), 
458			 "%s %s -x Node node_number initial_hostname reliable_hostname",
459             _PATH_SDRGETOBJECTS, Gopt ? "-G" : "");
460
461    if ((f = xpopen (cmd, "r")) == NULL)
462        errx("%p: error running %s\n", _PATH_SDRGETOBJECTS);
463
464    while (fgets (buf, LINEBUFSIZE, f) != NULL) 
465		_sdr_cache_name_line (buf);
466
467    xpclose(f);
468
469    sdr_initialized = true;
470}
471
472static hostlist_t _sdr_reliable_names ()
473{
474	hostlist_t hl = hostlist_create (NULL);
475	int i;
476
477	for (i = 0; i < MAX_SP_NODE_NUMBER; i++) {
478		if (sdrcache[i] != NULL)
479			hostlist_push_host (hl, sdrcache[i]->reliable_hostname);
480	}
481
482	return (hl);
483}
484
485static char *_list_nth(List l, int n)
486{
487    int i = 0;
488    char *name = NULL;
489    ListIterator itr = list_iterator_create(l);
490
491    while ((name = list_next(itr))) {
492        if (i++ == n) break;
493    }
494    list_iterator_destroy(itr);
495    return name;
496}
497
498static struct sdr_info * sdr_info_create (char *host, char *rhost)
499{
500	struct sdr_info *s = Malloc (sizeof (*s));
501
502	s->hostname = Strdup (host);
503	s->reliable_hostname = Strdup (rhost);
504
505	s->host_responds = false;
506	s->switch_responds = false;
507
508	return (s);
509}
510
511static void sdr_info_destroy (struct sdr_info *s)
512{
513    if (s == NULL)
514        return;
515
516	if (s->hostname)
517		Free ((void **) &s->hostname);
518	if (s->reliable_hostname)
519		Free ((void **) &s->reliable_hostname);
520
521	Free ((void **) &s);
522
523	return;
524}
525
526static struct sdr_info * _find_node (const char *name, int *rhost)
527{
528    int i;
529
530    for (i = 0; i < MAX_SP_NODE_NUMBER; i++) {
531        struct sdr_info *s = sdrcache[i];
532
533        if (s == NULL)
534            continue;
535
536        if (strncmp (name, s->reliable_hostname, 
537                     strlen (s->reliable_hostname)) == 0) {
538            if (rhost != NULL)
539                *rhost = 1;
540            return (s);
541        }
542
543        if (strncmp (name, s->hostname, strlen (s->hostname)) == 0) {
544            if (rhost != NULL)
545                *rhost = 0;
546            return (s);
547        }
548    }
549
550    return (NULL);
551}
552
553/*
554 * vi: tabstop=4 shiftwidth=4 expandtab
555 */