/tests/t1003-slurm.sh
Shell | 133 lines | 96 code | 13 blank | 24 comment | 10 complexity | a9ffa2d9824d6e73446f2dd434605cae MD5 | raw file
1#!/bin/sh 2# 3# Run tests of the SLURM module if slurm is available and there are 4# any currently running jobs, or if we can start a job. 5# 6 7test_description='slurm module' 8 9. ${srcdir:-.}/test-lib.sh 10 11if ! test_have_prereq MOD_MISC_SLURM; then 12 skip_all='skipping slurm tests, slurm module not available' 13 test_done 14fi 15 16if ! squeue >/dev/null 2>&1; then 17 skip_all='skipping slurm tests, slurm install not available' 18 test_done 19fi 20 21export KILLJOBIDS="" 22# 23# Create a batch job and return the jobid or FAILED on stdout 24# 25create_batch_job() { 26 ID=$(printf '#!/bin/sh\nsleep 100\n'|sbatch "$@" |sed 's/Submitted batch job //') 27 count=0 28 while test "$(squeue -j $ID -ho %t)" != "R" && $count -lt 30; do 29 sleep 1; 30 $((count=count+1)) 31 done 32 if test "$count" -ge 30; then 33 echo FAILED 34 else 35 KILLJOBIDS="$KILLJOBIDS $JOBID" 36 echo $ID 37 fi 38} 39 40# 41# Ensure slurm module is loaded (i.e. same as -M genders) 42# 43export PDSH_MISC_MODULES=slurm 44 45JOBIDS=$(squeue -ho %i -trunning) 46if [ -n "$JOBIDS" ]; then 47 # 48 # There are already running jobs we can use for testing 49 # 50 JOBID=$(echo $JOBIDS | tr ' ' '\n' | head -1) 51else 52 # 53 # Need to create our own job 54 # (Only run if long tests were requested) 55 # 56 if ! test_have_prereq LONGTESTS; then 57 skip_all='skipping slurm tests, run with --long or PDSH_TEST_LONG' 58 test_done 59 fi 60 echo "Attempting to initiate slurm job" >&2 61 JOBID=$(create_batch_job -N2) 62 if test "$JOBID" = "FAILED"; then 63 skip_all='skipping slurm tests, unable to run a job' 64 test_done 65 fi 66fi 67 68# 69# Capture the nodes in job JOBID 70# 71NODES=$(squeue -ho %N -j $JOBID) 72 73test_expect_success 'slurm -j option works' ' 74 O=$(pdsh -j$JOBID -q | tail -1) 75 if test "x$O" != "x$NODES"; then 76 say_color error "Error: pdsh -j$JOBID selected nodes $O expected $NODES" 77 squeue -hj $JOBID 78 false 79 fi 80' 81test_expect_success 'slurm module reads SLURM_JOBID if no wcoll set' ' 82 O=$(SLURM_JOBID=$JOBID pdsh -q | tail -1) 83 if test "x$O" != "x$NODES"; then 84 say_color error "Error: pdsh -j$JOBID selected nodes $O expected $NODES" 85 squeue -hj $JOBID 86 false 87 fi 88 89' 90test_expect_success 'slurm -j all option works' ' 91 O1=$(pdsh -j all -q | tail -1) 92 O2=$(pdsh -j$(squeue -ho %i -trunning | tr " \n" ,,) -q | tail -1) 93 if ! test "$O1" = "$O2"; then 94 say_color error "Error: pdsh -j all failed to select all allocated nodes" 95 say_color error "a: $O1" 96 say_color error "b: $O2" 97 false 98 fi 99' 100 101test_expect_success LONGTESTS 'slurm -j all does not select completed jobs' ' 102 jobid=$(create_batch_job -N1) && test "$jobid" != "FAILED" && 103 node=$(squeue -ho%N -j $jobid) && 104 scancel $jobid && 105 while test "$(squeue -j "$jobid" -ho %t)" = "CG"; do sleep 0.2; done 106 if pdsh -j all -Q | tail -1 | tr , "\n" | grep "^$node$"; then 107 if test "$(squeue -trunning -n$node -ho%t)" != "R"; then 108 say_color error "pdsh -j all selected node $node from completed job" 109 false 110 fi 111 fi 112' 113test_expect_success 'slurm -j option handles illegal jobid gracefully' ' 114 pdsh -j garbage 2>&1 | grep -q "invalid setting" 115' 116 117test_expect_success 'slurm -P option works' ' 118 part=$(sinfo -ho %P | head -1) 119 O1=$(sinfo -ho %N -p $part) 120 O2=$(pdsh -P $part -q | tail -1) 121 if test "x$O1" != "x$O2"; then 122 say_color error "Error: pdsh -P $part selected nodes $O2 expected $O1" 123 false 124 fi 125' 126 127# 128# Clean up: 129# 130echo "$KILLJOBIDS" 131test -n "$KILLJOBIDS" && scancel $KILLJOBIDS 132 133test_done