PageRenderTime 22ms CodeModel.GetById 13ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tests/t1003-slurm.sh

https://code.google.com/
Shell | 133 lines | 96 code | 13 blank | 24 comment | 10 complexity | a9ffa2d9824d6e73446f2dd434605cae MD5 | raw file
  1#!/bin/sh
  2#
  3#  Run tests of the SLURM module if slurm is available and there are 
  4#   any currently running jobs, or if we can start a job.
  5#
  6
  7test_description='slurm module'
  8
  9. ${srcdir:-.}/test-lib.sh
 10
 11if ! test_have_prereq MOD_MISC_SLURM; then
 12	skip_all='skipping slurm tests, slurm module not available'
 13	test_done
 14fi
 15
 16if ! squeue >/dev/null 2>&1; then
 17	skip_all='skipping slurm tests, slurm install not available'
 18	test_done
 19fi
 20
 21export KILLJOBIDS=""
 22#
 23#  Create a batch job and return the jobid or FAILED on stdout
 24#
 25create_batch_job() {
 26	ID=$(printf '#!/bin/sh\nsleep 100\n'|sbatch "$@" |sed 's/Submitted batch job //')
 27	count=0
 28    while test "$(squeue -j $ID -ho %t)" != "R" && $count -lt 30; do
 29        sleep 1;
 30        $((count=count+1))
 31    done
 32	if test "$count" -ge 30; then
 33		echo FAILED
 34	else
 35		KILLJOBIDS="$KILLJOBIDS $JOBID"
 36		echo $ID
 37	fi
 38}
 39
 40#
 41#  Ensure slurm module is loaded (i.e. same as -M genders)
 42#
 43export PDSH_MISC_MODULES=slurm
 44
 45JOBIDS=$(squeue -ho %i -trunning)
 46if [ -n "$JOBIDS" ]; then
 47   #
 48   #  There are already running jobs we can use for testing
 49   #
 50   JOBID=$(echo $JOBIDS | tr ' ' '\n' | head -1)
 51else
 52   #
 53   #  Need to create our own job
 54   #   (Only run if long tests were requested)
 55   #
 56   if !	test_have_prereq LONGTESTS; then
 57      skip_all='skipping slurm tests, run with --long or PDSH_TEST_LONG'
 58	  test_done
 59   fi
 60   echo "Attempting to initiate slurm job" >&2
 61   JOBID=$(create_batch_job -N2)
 62   if test "$JOBID" = "FAILED"; then
 63      skip_all='skipping slurm tests, unable to run a job'
 64	  test_done
 65   fi
 66fi
 67
 68#
 69#  Capture the nodes in job JOBID
 70#
 71NODES=$(squeue -ho %N -j $JOBID)
 72
 73test_expect_success 'slurm -j option works' '
 74	O=$(pdsh -j$JOBID -q | tail -1)
 75	if test "x$O" != "x$NODES"; then
 76	   say_color error "Error: pdsh -j$JOBID selected nodes $O expected $NODES"
 77	   squeue -hj $JOBID 
 78	   false
 79    fi
 80'
 81test_expect_success 'slurm module reads SLURM_JOBID if no wcoll set' '
 82	O=$(SLURM_JOBID=$JOBID pdsh -q | tail -1)
 83	if test "x$O" != "x$NODES"; then
 84	   say_color error "Error: pdsh -j$JOBID selected nodes $O expected $NODES"
 85	   squeue -hj $JOBID 
 86	   false
 87    fi
 88
 89'
 90test_expect_success 'slurm -j all option works' '
 91	O1=$(pdsh -j all -q | tail -1)
 92	O2=$(pdsh -j$(squeue -ho %i -trunning | tr " \n" ,,) -q | tail -1)
 93	if ! test "$O1" = "$O2"; then
 94	   say_color error "Error: pdsh -j all failed to select all allocated nodes"
 95	   say_color error "a: $O1"
 96	   say_color error "b: $O2"
 97	   false
 98	fi
 99'
100
101test_expect_success LONGTESTS 'slurm -j all does not select completed jobs' '
102	jobid=$(create_batch_job -N1) && test "$jobid" != "FAILED" &&
103	node=$(squeue -ho%N -j $jobid) &&
104	scancel $jobid &&
105    while test "$(squeue -j "$jobid" -ho %t)" = "CG"; do sleep 0.2; done
106	if pdsh -j all -Q | tail -1 | tr , "\n" | grep "^$node$"; then
107	   if test "$(squeue -trunning -n$node -ho%t)" != "R"; then
108	      say_color error "pdsh -j all selected node $node from completed job"
109		  false
110	   fi
111	fi
112'
113test_expect_success 'slurm -j option handles illegal jobid gracefully' '
114	pdsh -j garbage 2>&1 | grep -q "invalid setting"
115'
116
117test_expect_success 'slurm -P option works' '
118	part=$(sinfo -ho %P | head -1)
119	O1=$(sinfo -ho %N -p $part)
120	O2=$(pdsh -P $part -q | tail -1)
121        if test "x$O1" != "x$O2"; then
122		say_color error "Error: pdsh -P $part selected nodes $O2 expected $O1"
123		false
124	fi
125'
126
127#
128#  Clean up:
129#
130echo "$KILLJOBIDS"
131test -n "$KILLJOBIDS" && scancel $KILLJOBIDS
132
133test_done