PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/util/pbs/send.py

https://bitbucket.org/musleh123/ece565
Python | 292 lines | 255 code | 8 blank | 29 comment | 16 complexity | da4fcba1f678d91b1dd5d12139cf49ec MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, WTFPL
  1. #!/usr/bin/env python
  2. # Copyright (c) 2005 The Regents of The University of Michigan
  3. # All rights reserved.
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are
  7. # met: redistributions of source code must retain the above copyright
  8. # notice, this list of conditions and the following disclaimer;
  9. # redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution;
  12. # neither the name of the copyright holders nor the names of its
  13. # contributors may be used to endorse or promote products derived from
  14. # this software without specific prior written permission.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. #
  28. # Authors: Ali Saidi
  29. # Nathan Binkert
  30. import os, os.path, re, socket, sys
  31. from os import environ as env, listdir
  32. from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
  33. from filecmp import cmp as filecmp
  34. from shutil import copy
  35. def nfspath(dir):
  36. if dir.startswith('/.automount/'):
  37. dir = '/n/%s' % dir[12:]
  38. elif not dir.startswith('/n/'):
  39. dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
  40. return dir
  41. def syncdir(srcdir, destdir):
  42. srcdir = normpath(srcdir)
  43. destdir = normpath(destdir)
  44. if not isdir(destdir):
  45. sys.exit('destination directory "%s" does not exist' % destdir)
  46. for root, dirs, files in os.walk(srcdir):
  47. root = normpath(root)
  48. prefix = os.path.commonprefix([root, srcdir])
  49. root = root[len(prefix):]
  50. if root.startswith('/'):
  51. root = root[1:]
  52. for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
  53. dirs.remove(rem)
  54. for entry in dirs:
  55. newdir = joinpath(destdir, root, entry)
  56. if not isdir(newdir):
  57. os.mkdir(newdir)
  58. print 'mkdir', newdir
  59. for i,d in enumerate(dirs):
  60. if islink(joinpath(srcdir, root, d)):
  61. dirs[i] = joinpath(d, '.')
  62. for entry in files:
  63. dest = normpath(joinpath(destdir, root, entry))
  64. src = normpath(joinpath(srcdir, root, entry))
  65. if not isfile(dest) or not filecmp(src, dest):
  66. print 'copy %s %s' % (dest, src)
  67. copy(src, dest)
  68. progpath = nfspath(sys.path[0])
  69. progname = basename(sys.argv[0])
  70. usage = """\
  71. Usage:
  72. %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
  73. -c clean directory if job can be run
  74. -C submit the checkpointing runs
  75. -d Make jobs be dependent on the completion of the checkpoint runs
  76. -e only echo pbs command info, don't actually send the job
  77. -f force the job to run regardless of state
  78. -q <queue> submit job to the named queue
  79. -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
  80. -v be verbose
  81. %(progname)s [-j <jobfile>] -l [-v] <regexp>
  82. -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
  83. -l list job names, don't submit
  84. -v be verbose (list job parameters)
  85. %(progname)s -h
  86. -h display this help
  87. """ % locals()
  88. try:
  89. import getopt
  90. opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
  91. except getopt.GetoptError:
  92. sys.exit(usage)
  93. depend = False
  94. clean = False
  95. onlyecho = False
  96. exprs = []
  97. force = False
  98. listonly = False
  99. queue = ''
  100. verbose = False
  101. jfile = 'Test.py'
  102. docpts = False
  103. doruns = True
  104. runflag = False
  105. node_type = 'FAST'
  106. update = True
  107. for opt,arg in opts:
  108. if opt == '-C':
  109. docpts = True
  110. if opt == '-c':
  111. clean = True
  112. if opt == '-d':
  113. depend = True
  114. if opt == '-e':
  115. onlyecho = True
  116. if opt == '-f':
  117. force = True
  118. if opt == '-h':
  119. print usage
  120. sys.exit(0)
  121. if opt == '-j':
  122. jfile = arg
  123. if opt == '-l':
  124. listonly = True
  125. if opt == '-n':
  126. update = False
  127. if opt == '-q':
  128. queue = arg
  129. if opt == '-R':
  130. runflag = True
  131. if opt == '-t':
  132. node_type = arg
  133. if opt == '-v':
  134. verbose = True
  135. if docpts:
  136. doruns = runflag
  137. for arg in args:
  138. exprs.append(re.compile(arg))
  139. import jobfile, pbs
  140. from job import JobDir, date
  141. conf = jobfile.JobFile(jfile)
  142. if update and not listonly and not onlyecho and isdir(conf.linkdir):
  143. if verbose:
  144. print 'Checking for outdated files in Link directory'
  145. if not isdir(conf.basedir):
  146. os.mkdir(conf.basedir)
  147. syncdir(conf.linkdir, conf.basedir)
  148. jobnames = {}
  149. joblist = []
  150. if docpts and doruns:
  151. gen = conf.alljobs()
  152. elif docpts:
  153. gen = conf.checkpoints()
  154. elif doruns:
  155. gen = conf.jobs()
  156. for job in gen:
  157. if job.name in jobnames:
  158. continue
  159. if exprs:
  160. for expr in exprs:
  161. if expr.match(job.name):
  162. joblist.append(job)
  163. break
  164. else:
  165. joblist.append(job)
  166. if listonly:
  167. if verbose:
  168. for job in joblist:
  169. job.printinfo()
  170. else:
  171. for job in joblist:
  172. print job.name
  173. sys.exit(0)
  174. if not onlyecho:
  175. newlist = []
  176. for job in joblist:
  177. jobdir = JobDir(joinpath(conf.rootdir, job.name))
  178. if jobdir.exists():
  179. if not force:
  180. status = jobdir.getstatus()
  181. if status == 'queued':
  182. continue
  183. if status == 'running':
  184. continue
  185. if status == 'success':
  186. continue
  187. if not clean:
  188. sys.exit('job directory %s not clean!' % jobdir)
  189. jobdir.clean()
  190. newlist.append(job)
  191. joblist = newlist
  192. class NameHack(object):
  193. def __init__(self, host='pbs.pool', port=24465):
  194. self.host = host
  195. self.port = port
  196. self.socket = None
  197. def setname(self, jobid, jobname):
  198. try:
  199. jobid = int(jobid)
  200. except ValueError:
  201. jobid = int(jobid.strip().split('.')[0])
  202. jobname = jobname.strip()
  203. # since pbs can handle jobnames of 15 characters or less,
  204. # don't use the raj hack.
  205. if len(jobname) <= 15:
  206. return
  207. if self.socket is None:
  208. import socket
  209. self.socket = socket.socket()
  210. # Connect to pbs.pool and send the jobid/jobname pair to port
  211. # 24465 (Raj didn't realize that there are only 64k ports and
  212. # setup inetd to point to port 90001)
  213. self.socket.connect((self.host, self.port))
  214. self.socket.send("%s %s\n" % (jobid, jobname))
  215. namehack = NameHack()
  216. for job in joblist:
  217. jobdir = JobDir(joinpath(conf.rootdir, job.name))
  218. if depend:
  219. cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
  220. cptjob = cptdir.readval('.pbs_jobid')
  221. if not onlyecho:
  222. jobdir.create()
  223. print 'Job name: %s' % job.name
  224. print 'Job directory: %s' % jobdir
  225. qsub = pbs.qsub()
  226. qsub.pbshost = 'simpool.eecs.umich.edu'
  227. qsub.stdout = jobdir.file('jobout')
  228. qsub.name = job.name[:15]
  229. qsub.join = True
  230. qsub.node_type = node_type
  231. qsub.env['ROOTDIR'] = conf.rootdir
  232. qsub.env['JOBNAME'] = job.name
  233. if depend:
  234. qsub.afterok = cptjob
  235. if queue:
  236. qsub.queue = queue
  237. qsub.build(joinpath(progpath, 'job.py'))
  238. if verbose:
  239. print 'PBS Command: %s' % qsub.command
  240. if not onlyecho:
  241. ec = qsub.do()
  242. if ec == 0:
  243. jobid = qsub.result
  244. print 'PBS Jobid: %s' % jobid
  245. namehack.setname(jobid, job.name)
  246. queued = date()
  247. jobdir.echofile('.pbs_jobid', jobid)
  248. jobdir.echofile('.pbs_jobname', job.name)
  249. jobdir.echofile('.queued', queued)
  250. jobdir.setstatus('queued on %s' % queued)
  251. else:
  252. print 'PBS Failed'