PageRenderTime 47ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/util/qdo

https://bitbucket.org/musleh123/ece565
Python | 238 lines | 175 code | 15 blank | 48 comment | 7 complexity | d79f660d960b770ffc31f90cd664b8d0 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, WTFPL
  1. #! /usr/bin/env python
  2. # Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
  3. # All rights reserved.
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are
  7. # met: redistributions of source code must retain the above copyright
  8. # notice, this list of conditions and the following disclaimer;
  9. # redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution;
  12. # neither the name of the copyright holders nor the names of its
  13. # contributors may be used to endorse or promote products derived from
  14. # this software without specific prior written permission.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. #
  28. # Authors: Steve Reinhardt
  29. # Ali Saidi
  30. # Important!
  31. # This script expects a simple $ prompt, if you are using a shell other than
  32. # sh which defaults to this you'll need to add something like the following
  33. # to your bashrc/bash_profile script:
  34. #if [ "$OAR_USER" = "xxxx" ]; then
  35. # PS1='$ '
  36. import sys
  37. import os
  38. import re
  39. import time
  40. import optparse
  41. import pexpect
  42. progname = os.path.basename(sys.argv[0])
  43. usage = "%prog [options] command [command arguments]"
  44. optparser = optparse.OptionParser(usage=usage)
  45. optparser.allow_interspersed_args=False
  46. optparser.add_option('-e', dest='stderr_file',
  47. help='command stderr output file')
  48. optparser.add_option('-o', dest='stdout_file',
  49. help='command stdout output file')
  50. optparser.add_option('-l', dest='save_log', action='store_true',
  51. help='save oarsub output log file')
  52. optparser.add_option('-N', dest='job_name',
  53. help='oarsub job name')
  54. optparser.add_option('-q', dest='dest_queue',
  55. help='oarsub destination queue')
  56. optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
  57. help='oarsub queue wait timeout', default=30*60)
  58. optparser.add_option('-t', dest='cmd_timeout', type='int',
  59. help='command execution timeout', default=600*60)
  60. (options, cmd) = optparser.parse_args()
  61. if cmd == []:
  62. print >>sys.stderr, "%s: missing command" % progname
  63. sys.exit(1)
  64. # If we want to do this, need to add check here to make sure cmd[0] is
  65. # a valid PBS job name, else oarsub will die on us.
  66. #
  67. #if not options.job_name:
  68. # options.job_name = cmd[0]
  69. cwd = os.getcwd()
  70. # Deal with systems where /n is a symlink to /.automount
  71. if cwd.startswith('/.automount/'):
  72. cwd = cwd.replace('/.automount/', '/n/', 1)
  73. if not cwd.startswith('/n/poolfs/'):
  74. print >>sys.stderr, "Error: current directory must be under /n/poolfs."
  75. sys.exit(1)
  76. # The Shell class wraps pexpect.spawn with some handy functions that
  77. # assume the thing on the other end is a Bourne/bash shell.
  78. class Shell(pexpect.spawn):
  79. # Regexp to match the shell prompt. We change the prompt to
  80. # something fixed and distinctive to make it easier to match
  81. # reliably.
  82. prompt_re = re.compile('qdo\$ ')
  83. def __init__(self, cmd):
  84. # initialize base pexpect.spawn object
  85. try:
  86. pexpect.spawn.__init__(self, cmd)
  87. except pexpect.ExceptionPexpect, exc:
  88. print "%s:" % progname, exc
  89. sys.exit(1)
  90. # full_output accumulates the full output of the session
  91. self.full_output = ""
  92. self.quick_timeout = 15
  93. # wait for a prompt, then change it
  94. try:
  95. self.expect('\$ ', options.oarsub_timeout)
  96. except pexpect.TIMEOUT:
  97. print >>sys.stderr, "%s: oarsub timed out." % progname
  98. self.kill(9)
  99. self.safe_close()
  100. sys.exit(1)
  101. self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
  102. # version of expect that updates full_output too
  103. def expect(self, regexp, timeout = -1):
  104. pexpect.spawn.expect(self, regexp, timeout)
  105. self.full_output += self.before + self.after
  106. # Just issue a command and wait for the next prompt.
  107. # Returns a string containing the output of the command.
  108. def do_bare_command(self, cmd, timeout = -1):
  109. global full_output
  110. self.sendline(cmd)
  111. # read back the echo of the command
  112. self.readline()
  113. # wait for the next prompt
  114. self.expect(self.prompt_re, timeout)
  115. output = self.before.rstrip()
  116. return output
  117. # Issue a command, then query its exit status.
  118. # Returns a (string, int) tuple with the command output and the status.
  119. def do_command(self, cmd, timeout = -1):
  120. # do the command itself
  121. output = self.do_bare_command(cmd, timeout)
  122. # collect status
  123. status = int(self.do_bare_command("echo $?", self.quick_timeout))
  124. return (output, status)
  125. # Check to see if the given directory exists.
  126. def dir_exists(self, dirname):
  127. (output, status) = shell.do_command('[ -d %s ]' % dirname,
  128. self.quick_timeout)
  129. return status == 0
  130. # Don't actually try to close it.. just wait until it closes by itself
  131. # We can't actually kill the pid which is what it's trying to do, and if
  132. # we call wait we could be in an unfortunate situation of it printing input
  133. # right as we call wait, so the input is never read and the process never ends
  134. def safe_close(self):
  135. count = 0
  136. while self.isalive() and count < 10:
  137. time.sleep(1)
  138. self.close(force=False)
  139. # Spawn the interactive pool job.
  140. # Hack to do link on poolfs... disabled for now since
  141. # compiler/linker/library versioning problems between poolfs and
  142. # nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
  143. if False and len(cmd) > 50:
  144. shell_cmd = 'ssh -t poolfs /bin/sh -l'
  145. print "%s: running %s on poolfs" % (progname, cmd[0])
  146. else:
  147. shell_cmd = 'oarsub -I'
  148. if options.job_name:
  149. shell_cmd += ' -n "%s"' % options.job_name
  150. if options.dest_queue:
  151. shell_cmd += ' -q ' + options.dest_queue
  152. shell_cmd += ' -d %s' % cwd
  153. shell = Shell(shell_cmd)
  154. try:
  155. # chdir to cwd
  156. (output, status) = shell.do_command('cd ' + cwd)
  157. if status != 0:
  158. raise OSError, "Can't chdir to %s" % cwd
  159. # wacky hack: sometimes scons will create an output directory then
  160. # fork a job to generate files in that directory, and the job will
  161. # get run before the directory creation propagates through NFS.
  162. # This hack looks for a '-o' option indicating an output file and
  163. # waits for the corresponding directory to appear if necessary.
  164. try:
  165. if 'cc' in cmd[0] or 'g++' in cmd[0]:
  166. output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
  167. elif 'm5' in cmd[0]:
  168. output_dir = cmd[cmd.index('-d')+1]
  169. else:
  170. output_dir = None
  171. except (ValueError, IndexError):
  172. # no big deal if there's no '-o'/'-d' or if it's the final argument
  173. output_dir = None
  174. if output_dir:
  175. secs_waited = 0
  176. while not shell.dir_exists(output_dir) and secs_waited < 90:
  177. time.sleep(5)
  178. secs_waited += 5
  179. if secs_waited > 30:
  180. print "waited", secs_waited, "seconds for", output_dir
  181. # run command
  182. if options.stdout_file:
  183. cmd += ['>', options.stdout_file]
  184. if options.stderr_file:
  185. cmd += ['2>', options.stderr_file]
  186. try:
  187. (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
  188. except pexpect.TIMEOUT:
  189. print >>sys.stderr, "%s: command timed out after %d seconds." \
  190. % (progname, options.cmd_timeout)
  191. shell.sendline('~.') # oarsub/ssh termination escape sequence
  192. shell.safe_close()
  193. status = 3
  194. if output:
  195. print output
  196. finally:
  197. # end job
  198. if shell.isalive():
  199. shell.sendline('exit')
  200. shell.expect('Disconnected from OAR job .*')
  201. shell.safe_close()
  202. # if there was an error, log the output even if not requested
  203. if status != 0 or options.save_log:
  204. log = file('qdo-log.' + str(os.getpid()), 'w')
  205. log.write(shell.full_output)
  206. log.close()
  207. del shell
  208. sys.exit(status)