blob: 57fe81e38083e8ac0c3647e3d6a5c29a3bc6b065 [file] [log] [blame]
Guido van Rossum2108a501996-07-30 19:07:18 +00001# A parallelized "find(1)" using the thread module.
Guido van Rossum1b789f91993-12-17 14:45:06 +00002
3# This demonstrates the use of a work queue and worker threads.
4# It really does do more stats/sec when using multiple threads,
5# although the improvement is only about 20-30 percent.
Guido van Rossumb50e1de2002-10-18 18:20:33 +00006# (That was 8 years ago. In 2002, on Linux, I can't measure
7# a speedup. :-( )
Guido van Rossum1b789f91993-12-17 14:45:06 +00008
9# I'm too lazy to write a command line parser for the full find(1)
10# command line syntax, so the predicate it searches for is wired-in,
11# see function selector() below. (It currently searches for files with
Guido van Rossumb50e1de2002-10-18 18:20:33 +000012# world write permission.)
Guido van Rossum1b789f91993-12-17 14:45:06 +000013
14# Usage: parfind.py [-w nworkers] [directory] ...
Guido van Rossumb50e1de2002-10-18 18:20:33 +000015# Default nworkers is 4
Guido van Rossum1b789f91993-12-17 14:45:06 +000016
17
18import sys
19import getopt
Guido van Rossum1b789f91993-12-17 14:45:06 +000020import time
21import os
22from stat import *
23import thread
24
25
26# Work queue class. Usage:
27# wq = WorkQ()
28# wq.addwork(func, (arg1, arg2, ...)) # one or more calls
29# wq.run(nworkers)
30# The work is done when wq.run() completes.
31# The function calls executed by the workers may add more work.
32# Don't use keyboard interrupts!
33
34class WorkQ:
35
Tim Peters172c40b2001-01-21 07:07:30 +000036 # Invariants:
Guido van Rossum1b789f91993-12-17 14:45:06 +000037
Tim Peters172c40b2001-01-21 07:07:30 +000038 # - busy and work are only modified when mutex is locked
39 # - len(work) is the number of jobs ready to be taken
40 # - busy is the number of jobs being done
41 # - todo is locked iff there is no work and somebody is busy
Guido van Rossum1b789f91993-12-17 14:45:06 +000042
Tim Peters172c40b2001-01-21 07:07:30 +000043 def __init__(self):
44 self.mutex = thread.allocate()
45 self.todo = thread.allocate()
46 self.todo.acquire()
47 self.work = []
48 self.busy = 0
Guido van Rossum1b789f91993-12-17 14:45:06 +000049
Tim Peters172c40b2001-01-21 07:07:30 +000050 def addwork(self, func, args):
51 job = (func, args)
52 self.mutex.acquire()
53 self.work.append(job)
54 self.mutex.release()
55 if len(self.work) == 1:
56 self.todo.release()
Guido van Rossum1b789f91993-12-17 14:45:06 +000057
Tim Peters172c40b2001-01-21 07:07:30 +000058 def _getwork(self):
59 self.todo.acquire()
60 self.mutex.acquire()
61 if self.busy == 0 and len(self.work) == 0:
62 self.mutex.release()
63 self.todo.release()
64 return None
65 job = self.work[0]
66 del self.work[0]
67 self.busy = self.busy + 1
68 self.mutex.release()
69 if len(self.work) > 0:
70 self.todo.release()
71 return job
Guido van Rossum1b789f91993-12-17 14:45:06 +000072
Tim Peters172c40b2001-01-21 07:07:30 +000073 def _donework(self):
74 self.mutex.acquire()
75 self.busy = self.busy - 1
76 if self.busy == 0 and len(self.work) == 0:
77 self.todo.release()
78 self.mutex.release()
Guido van Rossum1b789f91993-12-17 14:45:06 +000079
Tim Peters172c40b2001-01-21 07:07:30 +000080 def _worker(self):
81 time.sleep(0.00001) # Let other threads run
82 while 1:
83 job = self._getwork()
84 if not job:
85 break
86 func, args = job
Neal Norwitzd9108552006-03-17 08:00:19 +000087 func(*args)
Tim Peters172c40b2001-01-21 07:07:30 +000088 self._donework()
Guido van Rossum1b789f91993-12-17 14:45:06 +000089
Tim Peters172c40b2001-01-21 07:07:30 +000090 def run(self, nworkers):
91 if not self.work:
92 return # Nothing to do
93 for i in range(nworkers-1):
94 thread.start_new(self._worker, ())
95 self._worker()
96 self.todo.acquire()
Guido van Rossum1b789f91993-12-17 14:45:06 +000097
98
99# Main program
100
101def main():
Tim Peters172c40b2001-01-21 07:07:30 +0000102 nworkers = 4
103 opts, args = getopt.getopt(sys.argv[1:], '-w:')
104 for opt, arg in opts:
105 if opt == '-w':
Neal Norwitzd9108552006-03-17 08:00:19 +0000106 nworkers = int(arg)
Tim Peters172c40b2001-01-21 07:07:30 +0000107 if not args:
108 args = [os.curdir]
Guido van Rossum1b789f91993-12-17 14:45:06 +0000109
Tim Peters172c40b2001-01-21 07:07:30 +0000110 wq = WorkQ()
111 for dir in args:
112 wq.addwork(find, (dir, selector, wq))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000113
Tim Peters172c40b2001-01-21 07:07:30 +0000114 t1 = time.time()
115 wq.run(nworkers)
116 t2 = time.time()
Guido van Rossum1b789f91993-12-17 14:45:06 +0000117
Walter Dörwald70a6b492004-02-12 17:35:32 +0000118 sys.stderr.write('Total time %r sec.\n' % (t2-t1))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000119
120
121# The predicate -- defines what files we look for.
122# Feel free to change this to suit your purpose
123
124def selector(dir, name, fullname, stat):
Guido van Rossumb50e1de2002-10-18 18:20:33 +0000125 # Look for world writable files that are not symlinks
Collin Winter6f2df4d2007-07-17 20:59:35 +0000126 return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])
Guido van Rossum1b789f91993-12-17 14:45:06 +0000127
128
129# The find procedure -- calls wq.addwork() for subdirectories
130
131def find(dir, pred, wq):
Tim Peters172c40b2001-01-21 07:07:30 +0000132 try:
133 names = os.listdir(dir)
Guido van Rossumb940e112007-01-10 16:19:56 +0000134 except os.error as msg:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000135 print(repr(dir), ':', msg)
Tim Peters172c40b2001-01-21 07:07:30 +0000136 return
137 for name in names:
138 if name not in (os.curdir, os.pardir):
139 fullname = os.path.join(dir, name)
140 try:
141 stat = os.lstat(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000142 except os.error as msg:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000143 print(repr(fullname), ':', msg)
Tim Peters172c40b2001-01-21 07:07:30 +0000144 continue
145 if pred(dir, name, fullname, stat):
Collin Winter6f2df4d2007-07-17 20:59:35 +0000146 print(fullname)
Tim Peters172c40b2001-01-21 07:07:30 +0000147 if S_ISDIR(stat[ST_MODE]):
148 if not os.path.ismount(fullname):
149 wq.addwork(find, (fullname, pred, wq))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000150
151
152# Call the main program
153
154main()