/util/qdo
Python | 238 lines | 175 code | 15 blank | 48 comment | 7 complexity | d79f660d960b770ffc31f90cd664b8d0 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, WTFPL
- #! /usr/bin/env python
- # Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are
- # met: redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer;
- # redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution;
- # neither the name of the copyright holders nor the names of its
- # contributors may be used to endorse or promote products derived from
- # this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # Authors: Steve Reinhardt
- # Ali Saidi
- # Important!
- # This script expects a simple $ prompt, if you are using a shell other than
- # sh which defaults to this you'll need to add something like the following
- # to your bashrc/bash_profile script:
- #if [ "$OAR_USER" = "xxxx" ]; then
- # PS1='$ '
- import sys
- import os
- import re
- import time
- import optparse
- import pexpect
- progname = os.path.basename(sys.argv[0])
- usage = "%prog [options] command [command arguments]"
- optparser = optparse.OptionParser(usage=usage)
- optparser.allow_interspersed_args=False
- optparser.add_option('-e', dest='stderr_file',
- help='command stderr output file')
- optparser.add_option('-o', dest='stdout_file',
- help='command stdout output file')
- optparser.add_option('-l', dest='save_log', action='store_true',
- help='save oarsub output log file')
- optparser.add_option('-N', dest='job_name',
- help='oarsub job name')
- optparser.add_option('-q', dest='dest_queue',
- help='oarsub destination queue')
- optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
- help='oarsub queue wait timeout', default=30*60)
- optparser.add_option('-t', dest='cmd_timeout', type='int',
- help='command execution timeout', default=600*60)
- (options, cmd) = optparser.parse_args()
- if cmd == []:
- print >>sys.stderr, "%s: missing command" % progname
- sys.exit(1)
- # If we want to do this, need to add check here to make sure cmd[0] is
- # a valid PBS job name, else oarsub will die on us.
- #
- #if not options.job_name:
- # options.job_name = cmd[0]
- cwd = os.getcwd()
- # Deal with systems where /n is a symlink to /.automount
- if cwd.startswith('/.automount/'):
- cwd = cwd.replace('/.automount/', '/n/', 1)
- if not cwd.startswith('/n/poolfs/'):
- print >>sys.stderr, "Error: current directory must be under /n/poolfs."
- sys.exit(1)
- # The Shell class wraps pexpect.spawn with some handy functions that
- # assume the thing on the other end is a Bourne/bash shell.
- class Shell(pexpect.spawn):
- # Regexp to match the shell prompt. We change the prompt to
- # something fixed and distinctive to make it easier to match
- # reliably.
- prompt_re = re.compile('qdo\$ ')
- def __init__(self, cmd):
- # initialize base pexpect.spawn object
- try:
- pexpect.spawn.__init__(self, cmd)
- except pexpect.ExceptionPexpect, exc:
- print "%s:" % progname, exc
- sys.exit(1)
- # full_output accumulates the full output of the session
- self.full_output = ""
- self.quick_timeout = 15
- # wait for a prompt, then change it
- try:
- self.expect('\$ ', options.oarsub_timeout)
- except pexpect.TIMEOUT:
- print >>sys.stderr, "%s: oarsub timed out." % progname
- self.kill(9)
- self.safe_close()
- sys.exit(1)
- self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
- # version of expect that updates full_output too
- def expect(self, regexp, timeout = -1):
- pexpect.spawn.expect(self, regexp, timeout)
- self.full_output += self.before + self.after
- # Just issue a command and wait for the next prompt.
- # Returns a string containing the output of the command.
- def do_bare_command(self, cmd, timeout = -1):
- global full_output
- self.sendline(cmd)
- # read back the echo of the command
- self.readline()
- # wait for the next prompt
- self.expect(self.prompt_re, timeout)
- output = self.before.rstrip()
- return output
- # Issue a command, then query its exit status.
- # Returns a (string, int) tuple with the command output and the status.
- def do_command(self, cmd, timeout = -1):
- # do the command itself
- output = self.do_bare_command(cmd, timeout)
- # collect status
- status = int(self.do_bare_command("echo $?", self.quick_timeout))
- return (output, status)
- # Check to see if the given directory exists.
- def dir_exists(self, dirname):
- (output, status) = shell.do_command('[ -d %s ]' % dirname,
- self.quick_timeout)
- return status == 0
-
- # Don't actually try to close it.. just wait until it closes by itself
- # We can't actually kill the pid which is what it's trying to do, and if
- # we call wait we could be in an unfortunate situation of it printing input
- # right as we call wait, so the input is never read and the process never ends
- def safe_close(self):
- count = 0
- while self.isalive() and count < 10:
- time.sleep(1)
- self.close(force=False)
-
- # Spawn the interactive pool job.
- # Hack to do link on poolfs... disabled for now since
- # compiler/linker/library versioning problems between poolfs and
- # nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
- if False and len(cmd) > 50:
- shell_cmd = 'ssh -t poolfs /bin/sh -l'
- print "%s: running %s on poolfs" % (progname, cmd[0])
- else:
- shell_cmd = 'oarsub -I'
- if options.job_name:
- shell_cmd += ' -n "%s"' % options.job_name
- if options.dest_queue:
- shell_cmd += ' -q ' + options.dest_queue
- shell_cmd += ' -d %s' % cwd
- shell = Shell(shell_cmd)
- try:
- # chdir to cwd
- (output, status) = shell.do_command('cd ' + cwd)
- if status != 0:
- raise OSError, "Can't chdir to %s" % cwd
- # wacky hack: sometimes scons will create an output directory then
- # fork a job to generate files in that directory, and the job will
- # get run before the directory creation propagates through NFS.
- # This hack looks for a '-o' option indicating an output file and
- # waits for the corresponding directory to appear if necessary.
- try:
- if 'cc' in cmd[0] or 'g++' in cmd[0]:
- output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
- elif 'm5' in cmd[0]:
- output_dir = cmd[cmd.index('-d')+1]
- else:
- output_dir = None
- except (ValueError, IndexError):
- # no big deal if there's no '-o'/'-d' or if it's the final argument
- output_dir = None
- if output_dir:
- secs_waited = 0
- while not shell.dir_exists(output_dir) and secs_waited < 90:
- time.sleep(5)
- secs_waited += 5
- if secs_waited > 30:
- print "waited", secs_waited, "seconds for", output_dir
- # run command
- if options.stdout_file:
- cmd += ['>', options.stdout_file]
- if options.stderr_file:
- cmd += ['2>', options.stderr_file]
- try:
- (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
- except pexpect.TIMEOUT:
- print >>sys.stderr, "%s: command timed out after %d seconds." \
- % (progname, options.cmd_timeout)
- shell.sendline('~.') # oarsub/ssh termination escape sequence
- shell.safe_close()
- status = 3
- if output:
- print output
- finally:
- # end job
- if shell.isalive():
- shell.sendline('exit')
- shell.expect('Disconnected from OAR job .*')
- shell.safe_close()
- # if there was an error, log the output even if not requested
- if status != 0 or options.save_log:
- log = file('qdo-log.' + str(os.getpid()), 'w')
- log.write(shell.full_output)
- log.close()
- del shell
- sys.exit(status)