PageRenderTime 20ms CodeModel.GetById 11ms app.highlight 6ms RepoModel.GetById 2ms app.codeStats 0ms

/cron/check_galaxy.sh

https://bitbucket.org/cistrome/cistrome-harvard/
Shell | 222 lines | 188 code | 17 blank | 17 comment | 37 complexity | 0fbb17ffe6cbf77ad54cc325d109816e MD5 | raw file
  1#!/bin/sh
  2#set -xv
  3#
  4# Runs the scripts/check_galaxy.py script in a way that's easy to handle from cron
  5#
  6
  7# defaults (note: default sleep is below since it depends on debug)
  8DEBUG=0
  9STAGGER=0
 10INTERVAL=3
 11MAIL=
 12PAGE=
 13NEWHIST=
 14BARDARG=0
 15# get commandline opts
 16while getopts dsi:l:m:p:n optname
 17do
 18    case $optname in
 19        d)  DEBUG=1 ;;
 20        s)  STAGGER=1 ;;
 21        i)  INTERVAL=$OPTARG ;;
 22        l)  SLEEP=$OPTARG ;;
 23        m)  MAIL="$MAIL $OPTARG" ;;
 24        p)  PAGE="$PAGE $OPTARG" ;;
 25        n)  NEWHIST="-n" ;;
 26        *)  BADARG=1 ;;
 27    esac
 28done
 29shift `expr $OPTIND - 1`
 30
 31if [ -z "$1" -o "$BADARG" ]; then
 32    cat <<EOF
 33usage: `basename $0` [-ds] [-i interval] [-m email_address]+ [-p pager_address]+ <galaxy_host>"
 34  -d            Print debugging information.
 35  -s            Stagger mailing the pagers/emails, instead of all at once when
 36                there's a problem.  Useful for running check_galaxy at night.
 37  -i <interval> The number of times this wrapper should execute before mailing
 38                the next address, when staggering is enabled.  Mail is sent
 39                every <interval> runs of the program, so the actual time
 40                between emails is:
 41                  time = (<interval>) * (how often wrapper runs from cron)
 42  -l <seconds>  This wrapper runs check_galaxy a second time if the first check
 43                fails, in case the problem is intermittent.  <seconds> is how
 44                many seconds to sleep between checks.
 45  -m <address>  Email addresses to send the full check_galaxy output to, if
 46                Galaxy is down.  Use multiple -m options to specify multiple
 47                addresses.  When staggering, email will be sent in the order
 48                which you specify -m options on the command line.
 49  -p <address>  Like -m, but sends just the last line of check_galaxy's output.
 50                Useful for pagers.  When staggering is enabled and both -m and
 51                -p options are present, the first -m address and the first -p
 52                address are mailed simultaneously, followed by the second -m
 53                and second -p, and so on.
 54  -n            Create a new history (passes the -n option to check_galaxy.py).
 55  <galaxy_host> The hostname of the Galaxy server to check.  Use a : if running
 56                on a non-80 port (e.g. galaxy.example.com:8080).
 57EOF
 58    exit 1
 59fi
 60
 61if [ -z "$SLEEP" ]; then
 62    if [ $DEBUG ]; then
 63        SLEEP=2
 64    else
 65        SLEEP=60
 66    fi
 67fi
 68
 69# globals
 70CRON_DIR=`dirname $0`
 71SCRIPTS_DIR="$CRON_DIR/../scripts"
 72CHECK_GALAXY="$SCRIPTS_DIR/check_galaxy.py"
 73VAR="$HOME/.check_galaxy"
 74
 75# sanity
 76if [ ! -f $CHECK_GALAXY ]; then
 77    [ $DEBUG = 1 ] && echo "$CHECK_GALAXY is missing"
 78    exit 0
 79fi
 80
 81# Do any other systems' default ps not take BSD ps args?
 82case `uname -s` in
 83    SunOS)  PS="/usr/ucb/ps" ;;
 84    *)      PS="ps" ;;
 85esac
 86
 87NOTIFIED_MAIL="$VAR/$1/mail"
 88NOTIFIED_PAGE="$VAR/$1/page"
 89MUTEX="$VAR/$1/wrap.mutex"
 90COUNT="$VAR/$1/wrap.count"
 91STAGGER_FILE="$VAR/$1/wrap.stagger"
 92for dir in $VAR/$1 $NOTIFIED_MAIL $NOTIFIED_PAGE; do
 93    if [ ! -d $dir ]; then
 94        mkdir -p -m 0700 $dir
 95        if [ $? -ne 0 ]; then
 96            [ $DEBUG = 1 ] && echo "unable to create dir: $dir"
 97            exit 0
 98        fi
 99    fi
100done
101
102if [ ! -f "$VAR/$1/login" ]; then
103    [ $DEBUG = 1 ] && cat <<EOF
104Please create the file:
105  $VAR/$1/login
106This should contain a username and password to log in to
107Galaxy with, on one line, separated by whitespace, e.g.:
108
109check_galaxy@example.com password
110
111If the user does not exist, check_galaxy will create it
112for you.
113EOF
114    exit 0
115fi
116
117if [ $STAGGER ]; then
118    if [ -f "$STAGGER_FILE" ]; then
119        STAGGER_COUNT=`cat $STAGGER_FILE`
120    else
121        STAGGER_COUNT=$INTERVAL
122    fi
123fi
124
125# only run one at once
126if [ -f $MUTEX ]; then
127    pid=`cat $MUTEX`
128    $PS p $pid >/dev/null 2>&1
129    if [ $? -eq 0 ]; then
130        if [ -f $COUNT ]; then
131            count=`cat $COUNT`
132        else
133            count=0
134        fi
135        if [ "$count" -eq 3 ]; then
136            echo "A check_galaxy process for $1 has been running for an unusually long time.  Something is broken." \
137                | mail -s "$1 problems" $MAIL
138        fi
139        expr $count + 1 > $COUNT
140        exit 0
141    else
142        # stale mutex
143        rm -f $MUTEX
144    fi
145fi
146
147rm -f $COUNT
148echo $$ > $MUTEX
149
150[ $DEBUG = 1 ] && echo "running first check"
151first_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
152
153if [ $? -ne 0 ]; then
154    # if failure, wait and try again
155    [ $DEBUG = 1 ] && echo "first check failed, sleeping $SLEEP seconds for second run"
156    sleep $SLEEP
157else
158    # if successful
159    [ $DEBUG = 1 ] && echo "first check succeeded"
160    for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
161    	recip=`basename $file`
162    	# the literal string including the * will be passed if the dir is empty
163	[ "$recip" = '*' ] && continue
164        echo "$1 is now okay" | mail -s "$1 OK" $recip
165        rm -f $file
166        [ $DEBUG = 1 ] && echo "up: mailed $recip"
167    done
168    rm -f $MUTEX $STAGGER_FILE
169    exit 0
170fi
171
172[ $DEBUG = 1 ] && echo "running second check"
173second_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
174
175if [ $? -ne 0 ]; then
176    [ $DEBUG = 1 ] && echo "second check failed"
177    if [ $STAGGER = 1 ]; then
178        if [ "$STAGGER_COUNT" -eq "$INTERVAL" ]; then
179            # send notification this run
180            echo 1 > $STAGGER_FILE
181        else
182            # don't send notification this run
183	    [ $DEBUG = 1 ] && echo "$1 is down, but it's not time to send an email.  STAGGER_COUNT was $STAGGER_COUNT"
184            expr $STAGGER_COUNT + 1 > $STAGGER_FILE
185            rm -f $MUTEX
186            exit 0
187        fi
188    fi
189    for recip in $MAIL; do
190        if [ ! -f "$NOTIFIED_MAIL/$recip" ]; then
191            cat <<HERE | mail -s "$1 problems" $recip
192$second_try
193HERE
194            touch "$NOTIFIED_MAIL/$recip"
195            [ $DEBUG = 1 ] && echo "dn: mailed $recip"
196            [ $STAGGER = 1 ] && break
197        fi
198    done
199    for recip in $PAGE; do
200        if [ ! -f "$NOTIFIED_PAGE/$recip" ]; then
201            cat <<HERE | tail -1 | mail -s "$1 problems" $recip
202$second_try
203HERE
204            touch "$NOTIFIED_PAGE/$recip"
205            [ $DEBUG = 1 ] && echo "dn: mailed $recip"
206            [ $STAGGER = 1 ] && break
207        fi
208    done
209else
210    [ $DEBUG = 1 ] && echo "second check succeeded"
211    for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
212    	recip=`basename $file`
213	[ "$recip" = '*' ] && continue
214        echo "$1 is now okay" | mail -s "$1 OK" $recip
215        rm -f $file
216        [ $DEBUG = 1 ] && echo "up: mailed $recip"
217    done
218    rm -f $STAGGER_FILE
219fi
220
221rm -f $MUTEX
222exit 0