PageRenderTime 45ms CodeModel.GetById 15ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/check_galaxy.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 401 lines | 392 code | 0 blank | 9 comment | 13 complexity | a2dbbfc279f5d876091d73f3f38c6188 MD5 | raw file
  1#!/usr/bin/env python
  2"""
  3check_galaxy can be run by hand, although it is meant to run from cron
  4via the check_galaxy.sh script in Galaxy's cron/ directory.
  5"""
  6
  7import socket, sys, os, time, tempfile, filecmp, htmllib, formatter, getopt
  8from user import home
  9
 10# options
 11if os.environ.has_key( "DEBUG" ):
 12    debug = os.environ["DEBUG"]
 13else:
 14    debug = False
 15scripts_dir = os.path.abspath( os.path.dirname( sys.argv[0] ) )
 16test_data_dir = os.path.join( scripts_dir, "..", "test-data" )
 17# what tools to run - not so pretty
 18tools = {
 19    "gops_intersect_1" :
 20    [
 21        {
 22            "inputs" :
 23            (
 24                os.path.join( test_data_dir, "1.bed" ),
 25                os.path.join( test_data_dir, "2.bed" )
 26            )
 27        },
 28        { "check_file" : os.path.join( test_data_dir, "gops_intersect_out.bed" ) },
 29        {
 30            "tool_run_options" :
 31            {
 32                "input1" : "1.bed",
 33                "input2" : "2.bed",
 34                "min" : "1",
 35                "returntype" : ""
 36            }
 37        }
 38    ]
 39}
 40
 41# handle arg(s)
 42def usage():
 43    print "usage: check_galaxy.py <server>"
 44    sys.exit(1)
 45
 46try:
 47    opts, args = getopt.getopt( sys.argv[1:], 'n' )
 48except getopt.GetoptError, e:
 49    print str(e)
 50    usage()
 51if len( args ) < 1:
 52    usage()
 53server = args[0]
 54if server.endswith(".g2.bx.psu.edu"):
 55    if debug:
 56        print "Checking a PSU Galaxy server, using maint file"
 57    maint = "/errordocument/502/%s/maint" % args[0].split('.', 1)[0]
 58else:
 59    maint = None
 60new_history = False
 61for o, a in opts:
 62    if o == "-n":
 63        if debug:
 64            print "Specified -n, will create a new history"
 65        new_history = True
 66    else:
 67        usage()
 68
 69# state information
 70var_dir = os.path.join( home, ".check_galaxy", server )
 71if not os.access( var_dir, os.F_OK ):
 72    os.makedirs( var_dir, 0700 )
 73
 74# get user/pass
 75login_file = os.path.join( var_dir, "login" )
 76try:
 77    f = open( login_file, 'r' )
 78except:
 79    print "Please create the file:"
 80    print " ", login_file
 81    print "This should contain a username and password to log in to"
 82    print "Galaxy with, on one line, separated by whitespace, e.g.:"
 83    print ""
 84    print "check_galaxy@example.com password"
 85    print ""
 86    print "If the user does not exist, check_galaxy will create it"
 87    print "for you."
 88    sys.exit(1)
 89( username, password ) = f.readline().split()
 90
 91# find/import twill
 92lib_dir = os.path.join( scripts_dir, "..", "lib" )
 93sys.path.append( lib_dir )
 94from galaxy import eggs
 95import pkg_resources
 96pkg_resources.require( "twill" )
 97import twill
 98import twill.commands as tc
 99
100# default timeout for twill browser is never
101socket.setdefaulttimeout(300)
102
103# user-agent
104tc.agent("Mozilla/5.0 (compatible; check_galaxy/0.1)")
105tc.config('use_tidy', 0)
106
107class Browser:
108
109    def __init__(self):
110        self.server = server
111        self.maint = maint
112        self.tool = None
113        self.tool_opts = None
114        self.id = None
115        self.status = None
116        self.check_file = None
117        self.hid = None
118        self.cookie_jar = os.path.join( var_dir, "cookie_jar" )
119        dprint("cookie jar path: %s" % self.cookie_jar)
120        if not os.access(self.cookie_jar, os.R_OK):
121            dprint("no cookie jar at above path, creating")
122            tc.save_cookies(self.cookie_jar)
123        tc.load_cookies(self.cookie_jar)
124
125    def get(self, path):
126        tc.go("http://%s%s" % (self.server, path))
127        tc.code(200)
128
129    def reset(self):
130        self.tool = None
131        self.tool_opts = None
132        self.id = None
133        self.status = None
134        self.check_file = None
135        self.delete_datasets()
136        self.get("/root/history")
137        p = didParser()
138        p.feed(tc.browser.get_html())
139        if len(p.dids) > 0:
140            print "Remaining datasets ids:", " ".join( p.dids )
141            raise Exception, "History still contains datasets after attempting to delete them"
142        if new_history:
143            self.get("/history/delete_current")
144            tc.save_cookies(self.cookie_jar)
145
146    def check_redir(self, url):
147        try:
148            tc.get_browser()._browser.set_handle_redirect(False)
149            tc.go(url)
150            tc.code(302)
151            tc.get_browser()._browser.set_handle_redirect(True)
152            dprint( "%s is returning redirect (302)" % url )
153            return(True)
154        except twill.errors.TwillAssertionError, e:
155            tc.get_browser()._browser.set_handle_redirect(True)
156            dprint( "%s is not returning redirect (302): %s" % (url, e) )
157            code = tc.browser.get_code()
158            if code == 502:
159                is_maint = self.check_maint()
160                if is_maint:
161                    dprint( "Galaxy is down, but a maint file was found, so not sending alert" )
162                    sys.exit(0)
163                else:
164                    print "Galaxy is down (code 502)"
165                    sys.exit(1)
166            return(False)
167
168    # checks for a maint file
169    def check_maint(self):
170        if self.maint is None:
171            #dprint( "Warning: unable to check maint file for %s" % self.server )
172            return(False)
173        try:
174            self.get(self.maint)
175            return(True)
176        except twill.errors.TwillAssertionError, e:
177            return(False)
178
179    def login(self, user, pw):
180        self.get("/user/login")
181        tc.fv("1", "email", user)
182        tc.fv("1", "password", pw)
183        tc.submit("Login")
184        tc.code(200)
185        if len(tc.get_browser().get_all_forms()) > 0:
186            # uh ohs, fail
187            p = userParser()
188            p.feed(tc.browser.get_html())
189            if p.no_user:
190                dprint("user does not exist, will try creating")
191                self.create_user(user, pw)
192            elif p.bad_pw:
193                raise Exception, "Password is incorrect"
194            else:
195                raise Exception, "Unknown error logging in"
196        tc.save_cookies(self.cookie_jar)
197
198    def create_user(self, user, pw):
199        self.get("/user/create")
200        tc.fv("1", "email", user)
201        tc.fv("1", "password", pw)
202        tc.fv("1", "confirm", pw)
203        tc.submit("Submit")
204        tc.code(200)
205        if len(tc.get_browser().get_all_forms()) > 0:
206            p = userParser()
207            p.feed(tc.browser.get_html())
208            if p.already_exists:
209                raise Exception, 'The user you were trying to create already exists'
210
211    def upload(self, file):
212        self.get("/tool_runner/index?tool_id=upload1")
213        tc.fv("1","file_type", "bed")
214        tc.formfile("1","file_data", file)
215        tc.submit("runtool_btn")
216        tc.code(200)
217
218    def runtool(self):
219        self.get("/tool_runner/index?tool_id=%s" % self.tool)
220        for k, v in self.tool_opts.items():
221            tc.fv("1", k, v)
222        tc.submit("runtool_btn")
223        tc.code(200)
224
225    def wait(self):
226        sleep_amount = 1
227        count = 0
228        maxiter = 16
229        while count < maxiter:
230            count += 1
231            self.get("/root/history")
232            page = tc.browser.get_html()
233            if page.find( '<!-- running: do not change this comment, used by TwillTestCase.wait -->' ) > -1:
234                time.sleep( sleep_amount )
235                sleep_amount += 1
236            else:
237                break
238        if count == maxiter:
239            raise Exception, "Tool never finished"
240
241    def check_status(self):
242        self.get("/root/history")
243        p = historyParser()
244        p.feed(tc.browser.get_html())
245        if p.status != "ok":
246            raise Exception, "JOB %s NOT OK: %s" % (p.id, p.status)
247        self.id = p.id
248        self.status = p.status
249        #return((p.id, p.status))
250
251    def diff(self):
252        self.get("/datasets/%s/display/display?to_ext=bed" % self.id)
253        data = tc.browser.get_html()
254        tmp = tempfile.mkstemp()
255        dprint("tmp file: %s" % tmp[1])
256        tmpfh = os.fdopen(tmp[0], 'w')
257        tmpfh.write(data)
258        tmpfh.close()
259        if filecmp.cmp(tmp[1], self.check_file):
260            dprint("Tool output is as expected")
261        else:
262            if not debug:
263                os.remove(tmp[1])
264            raise Exception, "Tool output differs from expected"
265        if not debug:
266            os.remove(tmp[1])
267
268    def delete_datasets(self):
269        self.get("/root/history")
270        p = didParser()
271        p.feed(tc.browser.get_html())
272        dids = p.dids
273        for did in dids:
274            self.get("/datasets/%s/delete" % did)
275
276    def check_if_logged_in(self):
277        self.get("/user?cntrller=user")
278        p = loggedinParser()
279        p.feed(tc.browser.get_html())
280        return p.logged_in
281
282class userParser(htmllib.HTMLParser):
283    def __init__(self):
284        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
285        self.in_span = False
286        self.in_div = False
287        self.no_user = False
288        self.bad_pw = False
289        self.already_exists = False
290    def start_span(self, attrs):
291        self.in_span = True
292    def start_div(self, attrs):
293        self.in_div = True
294    def end_span(self):
295        self.in_span = False
296    def end_div(self):
297        self.in_div = False
298    def handle_data(self, data):
299        if self.in_span or self.in_div:
300            if data == "No such user (please note that login is case sensitive)":
301                self.no_user = True
302            elif data == "Invalid password":
303                self.bad_pw = True
304            elif data == "User with that email already exists":
305                self.already_exists = True
306
307class historyParser(htmllib.HTMLParser):
308    def __init__(self):
309        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
310        self.status = None
311        self.id = None
312    def start_div(self, attrs):
313        # find the top history item
314        for i in attrs:
315            if i[0] == "class" and i[1].startswith("historyItemWrapper historyItem historyItem-"):
316                self.status = i[1].rsplit("historyItemWrapper historyItem historyItem-", 1)[1]
317                dprint("status: %s" % self.status)
318            if i[0] == "id" and i[1].startswith("historyItem-"):
319                self.id = i[1].rsplit("historyItem-", 1)[1]
320                dprint("id: %s" % self.id)
321        if self.status is not None:
322            self.reset()
323
324class didParser(htmllib.HTMLParser):
325    def __init__(self):
326        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
327        self.dids = []
328    def start_div(self, attrs):
329        for i in attrs:
330            if i[0] == "id" and i[1].startswith("historyItemContainer-"):
331                self.dids.append( i[1].rsplit("historyItemContainer-", 1)[1] )
332                dprint("got a dataset id: %s" % self.dids[-1])
333
334class loggedinParser(htmllib.HTMLParser):
335    def __init__(self):
336        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
337        self.in_p = False
338        self.logged_in = False
339    def start_p(self, attrs):
340        self.in_p = True
341    def end_p(self):
342        self.in_p = False
343    def handle_data(self, data):
344        if self.in_p:
345            if data == "You are currently not logged in.":
346                self.logged_in = False
347            elif data.startswith( "You are currently logged in as " ):
348                self.logged_in = True
349
350def dprint(str):
351    if debug:
352        print str
353
354# do stuff here
355if __name__ == "__main__":
356
357    dprint("checking %s" % server)
358
359    b = Browser()
360
361    # login (or not)
362    if b.check_if_logged_in():
363        dprint("we are already logged in (via cookies), hooray!")
364    else:
365        dprint("not logged in... logging in")
366        b.login(username, password)
367
368    for tool, params in tools.iteritems():
369
370        check_file = ""
371
372        # make sure history and state is clean
373        b.reset()
374        b.tool = tool
375
376        # get all the tool run conditions
377        for dict in params:
378            for k, v in dict.items():
379                if k == 'inputs':
380                    for file in v:
381                        b.upload(file)
382                elif k == 'check_file':
383                    b.check_file = v
384                elif k == 'tool_run_options':
385                    b.tool_opts = v
386                else:
387                    raise Exception, "Unknown key in tools dict: %s" % k
388
389        b.runtool()
390        b.wait()
391        b.check_status()
392        b.diff()
393        b.delete_datasets()
394
395        # by this point, everything else has succeeded.  there should be no maint.
396        is_maint = b.check_maint()
397        if is_maint:
398            print "Galaxy is up and fully functional, but a maint file is in place."
399            sys.exit(1)
400
401    sys.exit(0)