/Demo/threads/find.py

http://unladen-swallow.googlecode.com/ · Python · 155 lines · 94 code · 31 blank · 30 comment · 24 complexity · 92fe2751b5527d17c0f073ea953faf77 MD5 · raw file

  1. # A parallelized "find(1)" using the thread module.
  2. # This demonstrates the use of a work queue and worker threads.
  3. # It really does do more stats/sec when using multiple threads,
  4. # although the improvement is only about 20-30 percent.
  5. # (That was 8 years ago. In 2002, on Linux, I can't measure
  6. # a speedup. :-( )
  7. # I'm too lazy to write a command line parser for the full find(1)
  8. # command line syntax, so the predicate it searches for is wired-in,
  9. # see function selector() below. (It currently searches for files with
  10. # world write permission.)
  11. # Usage: parfind.py [-w nworkers] [directory] ...
  12. # Default nworkers is 4
  13. import sys
  14. import getopt
  15. import string
  16. import time
  17. import os
  18. from stat import *
  19. import thread
  20. # Work queue class. Usage:
  21. # wq = WorkQ()
  22. # wq.addwork(func, (arg1, arg2, ...)) # one or more calls
  23. # wq.run(nworkers)
  24. # The work is done when wq.run() completes.
  25. # The function calls executed by the workers may add more work.
  26. # Don't use keyboard interrupts!
  27. class WorkQ:
  28. # Invariants:
  29. # - busy and work are only modified when mutex is locked
  30. # - len(work) is the number of jobs ready to be taken
  31. # - busy is the number of jobs being done
  32. # - todo is locked iff there is no work and somebody is busy
  33. def __init__(self):
  34. self.mutex = thread.allocate()
  35. self.todo = thread.allocate()
  36. self.todo.acquire()
  37. self.work = []
  38. self.busy = 0
  39. def addwork(self, func, args):
  40. job = (func, args)
  41. self.mutex.acquire()
  42. self.work.append(job)
  43. self.mutex.release()
  44. if len(self.work) == 1:
  45. self.todo.release()
  46. def _getwork(self):
  47. self.todo.acquire()
  48. self.mutex.acquire()
  49. if self.busy == 0 and len(self.work) == 0:
  50. self.mutex.release()
  51. self.todo.release()
  52. return None
  53. job = self.work[0]
  54. del self.work[0]
  55. self.busy = self.busy + 1
  56. self.mutex.release()
  57. if len(self.work) > 0:
  58. self.todo.release()
  59. return job
  60. def _donework(self):
  61. self.mutex.acquire()
  62. self.busy = self.busy - 1
  63. if self.busy == 0 and len(self.work) == 0:
  64. self.todo.release()
  65. self.mutex.release()
  66. def _worker(self):
  67. time.sleep(0.00001) # Let other threads run
  68. while 1:
  69. job = self._getwork()
  70. if not job:
  71. break
  72. func, args = job
  73. apply(func, args)
  74. self._donework()
  75. def run(self, nworkers):
  76. if not self.work:
  77. return # Nothing to do
  78. for i in range(nworkers-1):
  79. thread.start_new(self._worker, ())
  80. self._worker()
  81. self.todo.acquire()
  82. # Main program
  83. def main():
  84. nworkers = 4
  85. opts, args = getopt.getopt(sys.argv[1:], '-w:')
  86. for opt, arg in opts:
  87. if opt == '-w':
  88. nworkers = string.atoi(arg)
  89. if not args:
  90. args = [os.curdir]
  91. wq = WorkQ()
  92. for dir in args:
  93. wq.addwork(find, (dir, selector, wq))
  94. t1 = time.time()
  95. wq.run(nworkers)
  96. t2 = time.time()
  97. sys.stderr.write('Total time %r sec.\n' % (t2-t1))
  98. # The predicate -- defines what files we look for.
  99. # Feel free to change this to suit your purpose
  100. def selector(dir, name, fullname, stat):
  101. # Look for world writable files that are not symlinks
  102. return (stat[ST_MODE] & 0002) != 0 and not S_ISLNK(stat[ST_MODE])
  103. # The find procedure -- calls wq.addwork() for subdirectories
  104. def find(dir, pred, wq):
  105. try:
  106. names = os.listdir(dir)
  107. except os.error, msg:
  108. print repr(dir), ':', msg
  109. return
  110. for name in names:
  111. if name not in (os.curdir, os.pardir):
  112. fullname = os.path.join(dir, name)
  113. try:
  114. stat = os.lstat(fullname)
  115. except os.error, msg:
  116. print repr(fullname), ':', msg
  117. continue
  118. if pred(dir, name, fullname, stat):
  119. print fullname
  120. if S_ISDIR(stat[ST_MODE]):
  121. if not os.path.ismount(fullname):
  122. wq.addwork(find, (fullname, pred, wq))
  123. # Call the main program
  124. main()