Guido van Rossum | 2108a50 | 1996-07-30 19:07:18 +0000 | [diff] [blame] | 1 | # A parallelized "find(1)" using the thread module. |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 2 | |
| 3 | # This demonstrates the use of a work queue and worker threads. |
| 4 | # It really does do more stats/sec when using multiple threads, |
| 5 | # although the improvement is only about 20-30 percent. |
Guido van Rossum | b50e1de | 2002-10-18 18:20:33 +0000 | [diff] [blame] | 6 | # (That was 8 years ago. In 2002, on Linux, I can't measure |
| 7 | # a speedup. :-( ) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 8 | |
| 9 | # I'm too lazy to write a command line parser for the full find(1) |
| 10 | # command line syntax, so the predicate it searches for is wired-in, |
| 11 | # see function selector() below. (It currently searches for files with |
Guido van Rossum | b50e1de | 2002-10-18 18:20:33 +0000 | [diff] [blame] | 12 | # world write permission.) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 13 | |
| 14 | # Usage: parfind.py [-w nworkers] [directory] ... |
Guido van Rossum | b50e1de | 2002-10-18 18:20:33 +0000 | [diff] [blame] | 15 | # Default nworkers is 4 |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 16 | |
| 17 | |
| 18 | import sys |
| 19 | import getopt |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 20 | import time |
| 21 | import os |
| 22 | from stat import * |
| 23 | import thread |
| 24 | |
| 25 | |
| 26 | # Work queue class. Usage: |
| 27 | # wq = WorkQ() |
| 28 | # wq.addwork(func, (arg1, arg2, ...)) # one or more calls |
| 29 | # wq.run(nworkers) |
| 30 | # The work is done when wq.run() completes. |
| 31 | # The function calls executed by the workers may add more work. |
| 32 | # Don't use keyboard interrupts! |
| 33 | |
| 34 | class WorkQ: |
| 35 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 36 | # Invariants: |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 37 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 38 | # - busy and work are only modified when mutex is locked |
| 39 | # - len(work) is the number of jobs ready to be taken |
| 40 | # - busy is the number of jobs being done |
| 41 | # - todo is locked iff there is no work and somebody is busy |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 42 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 43 | def __init__(self): |
| 44 | self.mutex = thread.allocate() |
| 45 | self.todo = thread.allocate() |
| 46 | self.todo.acquire() |
| 47 | self.work = [] |
| 48 | self.busy = 0 |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 49 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 50 | def addwork(self, func, args): |
| 51 | job = (func, args) |
| 52 | self.mutex.acquire() |
| 53 | self.work.append(job) |
| 54 | self.mutex.release() |
| 55 | if len(self.work) == 1: |
| 56 | self.todo.release() |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 57 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 58 | def _getwork(self): |
| 59 | self.todo.acquire() |
| 60 | self.mutex.acquire() |
| 61 | if self.busy == 0 and len(self.work) == 0: |
| 62 | self.mutex.release() |
| 63 | self.todo.release() |
| 64 | return None |
| 65 | job = self.work[0] |
| 66 | del self.work[0] |
| 67 | self.busy = self.busy + 1 |
| 68 | self.mutex.release() |
| 69 | if len(self.work) > 0: |
| 70 | self.todo.release() |
| 71 | return job |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 72 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 73 | def _donework(self): |
| 74 | self.mutex.acquire() |
| 75 | self.busy = self.busy - 1 |
| 76 | if self.busy == 0 and len(self.work) == 0: |
| 77 | self.todo.release() |
| 78 | self.mutex.release() |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 79 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 80 | def _worker(self): |
| 81 | time.sleep(0.00001) # Let other threads run |
| 82 | while 1: |
| 83 | job = self._getwork() |
| 84 | if not job: |
| 85 | break |
| 86 | func, args = job |
Neal Norwitz | d910855 | 2006-03-17 08:00:19 +0000 | [diff] [blame] | 87 | func(*args) |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 88 | self._donework() |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 89 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 90 | def run(self, nworkers): |
| 91 | if not self.work: |
| 92 | return # Nothing to do |
| 93 | for i in range(nworkers-1): |
| 94 | thread.start_new(self._worker, ()) |
| 95 | self._worker() |
| 96 | self.todo.acquire() |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 97 | |
| 98 | |
| 99 | # Main program |
| 100 | |
| 101 | def main(): |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 102 | nworkers = 4 |
| 103 | opts, args = getopt.getopt(sys.argv[1:], '-w:') |
| 104 | for opt, arg in opts: |
| 105 | if opt == '-w': |
Neal Norwitz | d910855 | 2006-03-17 08:00:19 +0000 | [diff] [blame] | 106 | nworkers = int(arg) |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 107 | if not args: |
| 108 | args = [os.curdir] |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 109 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 110 | wq = WorkQ() |
| 111 | for dir in args: |
| 112 | wq.addwork(find, (dir, selector, wq)) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 113 | |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 114 | t1 = time.time() |
| 115 | wq.run(nworkers) |
| 116 | t2 = time.time() |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 117 | |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 118 | sys.stderr.write('Total time %r sec.\n' % (t2-t1)) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 119 | |
| 120 | |
| 121 | # The predicate -- defines what files we look for. |
| 122 | # Feel free to change this to suit your purpose |
| 123 | |
| 124 | def selector(dir, name, fullname, stat): |
Guido van Rossum | b50e1de | 2002-10-18 18:20:33 +0000 | [diff] [blame] | 125 | # Look for world writable files that are not symlinks |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 126 | return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE]) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 127 | |
| 128 | |
| 129 | # The find procedure -- calls wq.addwork() for subdirectories |
| 130 | |
| 131 | def find(dir, pred, wq): |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 132 | try: |
| 133 | names = os.listdir(dir) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 134 | except os.error as msg: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 135 | print(repr(dir), ':', msg) |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 136 | return |
| 137 | for name in names: |
| 138 | if name not in (os.curdir, os.pardir): |
| 139 | fullname = os.path.join(dir, name) |
| 140 | try: |
| 141 | stat = os.lstat(fullname) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 142 | except os.error as msg: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 143 | print(repr(fullname), ':', msg) |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 144 | continue |
| 145 | if pred(dir, name, fullname, stat): |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 146 | print(fullname) |
Tim Peters | 172c40b | 2001-01-21 07:07:30 +0000 | [diff] [blame] | 147 | if S_ISDIR(stat[ST_MODE]): |
| 148 | if not os.path.ismount(fullname): |
| 149 | wq.addwork(find, (fullname, pred, wq)) |
Guido van Rossum | 1b789f9 | 1993-12-17 14:45:06 +0000 | [diff] [blame] | 150 | |
| 151 | |
| 152 | # Call the main program |
| 153 | |
| 154 | main() |