/Doc/includes/mp_distributing.py

http://unladen-swallow.googlecode.com/ · Python · 364 lines · 259 code · 49 blank · 56 comment · 29 complexity · 7099e72e043978987a928ddefb772ec6 MD5 · raw file

  1. #
  2. # Module to allow spawning of processes on foreign host
  3. #
  4. # Depends on `multiprocessing` package -- tested with `processing-0.60`
  5. #
  6. # Copyright (c) 2006-2008, R Oudkerk
  7. # All rights reserved.
  8. #
  9. __all__ = ['Cluster', 'Host', 'get_logger', 'current_process']
  10. #
  11. # Imports
  12. #
  13. import sys
  14. import os
  15. import tarfile
  16. import shutil
  17. import subprocess
  18. import logging
  19. import itertools
  20. import Queue
  21. try:
  22. import cPickle as pickle
  23. except ImportError:
  24. import pickle
  25. from multiprocessing import Process, current_process, cpu_count
  26. from multiprocessing import util, managers, connection, forking, pool
  27. #
  28. # Logging
  29. #
  30. def get_logger():
  31. return _logger
  32. _logger = logging.getLogger('distributing')
  33. _logger.propagate = 0
  34. _formatter = logging.Formatter(util.DEFAULT_LOGGING_FORMAT)
  35. _handler = logging.StreamHandler()
  36. _handler.setFormatter(_formatter)
  37. _logger.addHandler(_handler)
  38. info = _logger.info
  39. debug = _logger.debug
  40. #
  41. # Get number of cpus
  42. #
  43. try:
  44. slot_count = cpu_count()
  45. except NotImplemented:
  46. slot_count = 1
  47. #
  48. # Manager type which spawns subprocesses
  49. #
  50. class HostManager(managers.SyncManager):
  51. '''
  52. Manager type used for spawning processes on a (presumably) foreign host
  53. '''
  54. def __init__(self, address, authkey):
  55. managers.SyncManager.__init__(self, address, authkey)
  56. self._name = 'Host-unknown'
  57. def Process(self, group=None, target=None, name=None, args=(), kwargs={}):
  58. if hasattr(sys.modules['__main__'], '__file__'):
  59. main_path = os.path.basename(sys.modules['__main__'].__file__)
  60. else:
  61. main_path = None
  62. data = pickle.dumps((target, args, kwargs))
  63. p = self._RemoteProcess(data, main_path)
  64. if name is None:
  65. temp = self._name.split('Host-')[-1] + '/Process-%s'
  66. name = temp % ':'.join(map(str, p.get_identity()))
  67. p.set_name(name)
  68. return p
  69. @classmethod
  70. def from_address(cls, address, authkey):
  71. manager = cls(address, authkey)
  72. managers.transact(address, authkey, 'dummy')
  73. manager._state.value = managers.State.STARTED
  74. manager._name = 'Host-%s:%s' % manager.address
  75. manager.shutdown = util.Finalize(
  76. manager, HostManager._finalize_host,
  77. args=(manager._address, manager._authkey, manager._name),
  78. exitpriority=-10
  79. )
  80. return manager
  81. @staticmethod
  82. def _finalize_host(address, authkey, name):
  83. managers.transact(address, authkey, 'shutdown')
  84. def __repr__(self):
  85. return '<Host(%s)>' % self._name
  86. #
  87. # Process subclass representing a process on (possibly) a remote machine
  88. #
  89. class RemoteProcess(Process):
  90. '''
  91. Represents a process started on a remote host
  92. '''
  93. def __init__(self, data, main_path):
  94. assert not main_path or os.path.basename(main_path) == main_path
  95. Process.__init__(self)
  96. self._data = data
  97. self._main_path = main_path
  98. def _bootstrap(self):
  99. forking.prepare({'main_path': self._main_path})
  100. self._target, self._args, self._kwargs = pickle.loads(self._data)
  101. return Process._bootstrap(self)
  102. def get_identity(self):
  103. return self._identity
  104. HostManager.register('_RemoteProcess', RemoteProcess)
  105. #
  106. # A Pool class that uses a cluster
  107. #
  108. class DistributedPool(pool.Pool):
  109. def __init__(self, cluster, processes=None, initializer=None, initargs=()):
  110. self._cluster = cluster
  111. self.Process = cluster.Process
  112. pool.Pool.__init__(self, processes or len(cluster),
  113. initializer, initargs)
  114. def _setup_queues(self):
  115. self._inqueue = self._cluster._SettableQueue()
  116. self._outqueue = self._cluster._SettableQueue()
  117. self._quick_put = self._inqueue.put
  118. self._quick_get = self._outqueue.get
  119. @staticmethod
  120. def _help_stuff_finish(inqueue, task_handler, size):
  121. inqueue.set_contents([None] * size)
  122. #
  123. # Manager type which starts host managers on other machines
  124. #
  125. def LocalProcess(**kwds):
  126. p = Process(**kwds)
  127. p.set_name('localhost/' + p.name)
  128. return p
  129. class Cluster(managers.SyncManager):
  130. '''
  131. Represents collection of slots running on various hosts.
  132. `Cluster` is a subclass of `SyncManager` so it allows creation of
  133. various types of shared objects.
  134. '''
  135. def __init__(self, hostlist, modules):
  136. managers.SyncManager.__init__(self, address=('localhost', 0))
  137. self._hostlist = hostlist
  138. self._modules = modules
  139. if __name__ not in modules:
  140. modules.append(__name__)
  141. files = [sys.modules[name].__file__ for name in modules]
  142. for i, file in enumerate(files):
  143. if file.endswith('.pyc') or file.endswith('.pyo'):
  144. files[i] = file[:-4] + '.py'
  145. self._files = [os.path.abspath(file) for file in files]
  146. def start(self):
  147. managers.SyncManager.start(self)
  148. l = connection.Listener(family='AF_INET', authkey=self._authkey)
  149. for i, host in enumerate(self._hostlist):
  150. host._start_manager(i, self._authkey, l.address, self._files)
  151. for host in self._hostlist:
  152. if host.hostname != 'localhost':
  153. conn = l.accept()
  154. i, address, cpus = conn.recv()
  155. conn.close()
  156. other_host = self._hostlist[i]
  157. other_host.manager = HostManager.from_address(address,
  158. self._authkey)
  159. other_host.slots = other_host.slots or cpus
  160. other_host.Process = other_host.manager.Process
  161. else:
  162. host.slots = host.slots or slot_count
  163. host.Process = LocalProcess
  164. self._slotlist = [
  165. Slot(host) for host in self._hostlist for i in range(host.slots)
  166. ]
  167. self._slot_iterator = itertools.cycle(self._slotlist)
  168. self._base_shutdown = self.shutdown
  169. del self.shutdown
  170. def shutdown(self):
  171. for host in self._hostlist:
  172. if host.hostname != 'localhost':
  173. host.manager.shutdown()
  174. self._base_shutdown()
  175. def Process(self, group=None, target=None, name=None, args=(), kwargs={}):
  176. slot = self._slot_iterator.next()
  177. return slot.Process(
  178. group=group, target=target, name=name, args=args, kwargs=kwargs
  179. )
  180. def Pool(self, processes=None, initializer=None, initargs=()):
  181. return DistributedPool(self, processes, initializer, initargs)
  182. def __getitem__(self, i):
  183. return self._slotlist[i]
  184. def __len__(self):
  185. return len(self._slotlist)
  186. def __iter__(self):
  187. return iter(self._slotlist)
  188. #
  189. # Queue subclass used by distributed pool
  190. #
  191. class SettableQueue(Queue.Queue):
  192. def empty(self):
  193. return not self.queue
  194. def full(self):
  195. return self.maxsize > 0 and len(self.queue) == self.maxsize
  196. def set_contents(self, contents):
  197. # length of contents must be at least as large as the number of
  198. # threads which have potentially called get()
  199. self.not_empty.acquire()
  200. try:
  201. self.queue.clear()
  202. self.queue.extend(contents)
  203. self.not_empty.notifyAll()
  204. finally:
  205. self.not_empty.release()
  206. Cluster.register('_SettableQueue', SettableQueue)
  207. #
  208. # Class representing a notional cpu in the cluster
  209. #
  210. class Slot(object):
  211. def __init__(self, host):
  212. self.host = host
  213. self.Process = host.Process
  214. #
  215. # Host
  216. #
  217. class Host(object):
  218. '''
  219. Represents a host to use as a node in a cluster.
  220. `hostname` gives the name of the host. If hostname is not
  221. "localhost" then ssh is used to log in to the host. To log in as
  222. a different user use a host name of the form
  223. "username@somewhere.org"
  224. `slots` is used to specify the number of slots for processes on
  225. the host. This affects how often processes will be allocated to
  226. this host. Normally this should be equal to the number of cpus on
  227. that host.
  228. '''
  229. def __init__(self, hostname, slots=None):
  230. self.hostname = hostname
  231. self.slots = slots
  232. def _start_manager(self, index, authkey, address, files):
  233. if self.hostname != 'localhost':
  234. tempdir = copy_to_remote_temporary_directory(self.hostname, files)
  235. debug('startup files copied to %s:%s', self.hostname, tempdir)
  236. p = subprocess.Popen(
  237. ['ssh', self.hostname, 'python', '-c',
  238. '"import os; os.chdir(%r); '
  239. 'from distributing import main; main()"' % tempdir],
  240. stdin=subprocess.PIPE
  241. )
  242. data = dict(
  243. name='BoostrappingHost', index=index,
  244. dist_log_level=_logger.getEffectiveLevel(),
  245. dir=tempdir, authkey=str(authkey), parent_address=address
  246. )
  247. pickle.dump(data, p.stdin, pickle.HIGHEST_PROTOCOL)
  248. p.stdin.close()
  249. #
  250. # Copy files to remote directory, returning name of directory
  251. #
  252. unzip_code = '''"
  253. import tempfile, os, sys, tarfile
  254. tempdir = tempfile.mkdtemp(prefix='distrib-')
  255. os.chdir(tempdir)
  256. tf = tarfile.open(fileobj=sys.stdin, mode='r|gz')
  257. for ti in tf:
  258. tf.extract(ti)
  259. print tempdir
  260. "'''
  261. def copy_to_remote_temporary_directory(host, files):
  262. p = subprocess.Popen(
  263. ['ssh', host, 'python', '-c', unzip_code],
  264. stdout=subprocess.PIPE, stdin=subprocess.PIPE
  265. )
  266. tf = tarfile.open(fileobj=p.stdin, mode='w|gz')
  267. for name in files:
  268. tf.add(name, os.path.basename(name))
  269. tf.close()
  270. p.stdin.close()
  271. return p.stdout.read().rstrip()
  272. #
  273. # Code which runs a host manager
  274. #
  275. def main():
  276. # get data from parent over stdin
  277. data = pickle.load(sys.stdin)
  278. sys.stdin.close()
  279. # set some stuff
  280. _logger.setLevel(data['dist_log_level'])
  281. forking.prepare(data)
  282. # create server for a `HostManager` object
  283. server = managers.Server(HostManager._registry, ('', 0), data['authkey'])
  284. current_process()._server = server
  285. # report server address and number of cpus back to parent
  286. conn = connection.Client(data['parent_address'], authkey=data['authkey'])
  287. conn.send((data['index'], server.address, slot_count))
  288. conn.close()
  289. # set name etc
  290. current_process().set_name('Host-%s:%s' % server.address)
  291. util._run_after_forkers()
  292. # register a cleanup function
  293. def cleanup(directory):
  294. debug('removing directory %s', directory)
  295. shutil.rmtree(directory)
  296. debug('shutting down host manager')
  297. util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0)
  298. # start host manager
  299. debug('remote host manager starting in %s', data['dir'])
  300. server.serve_forever()