blob: 7d5edc1c50b19a9a6361d0c641f9aa9457df544b [file] [log] [blame]
Guido van Rossum2108a501996-07-30 19:07:18 +00001# A parallelized "find(1)" using the thread module.
Guido van Rossum1b789f91993-12-17 14:45:06 +00002
3# This demonstrates the use of a work queue and worker threads.
4# It really does do more stats/sec when using multiple threads,
5# although the improvement is only about 20-30 percent.
Guido van Rossumb50e1de2002-10-18 18:20:33 +00006# (That was 8 years ago. In 2002, on Linux, I can't measure
7# a speedup. :-( )
Guido van Rossum1b789f91993-12-17 14:45:06 +00008
9# I'm too lazy to write a command line parser for the full find(1)
10# command line syntax, so the predicate it searches for is wired-in,
11# see function selector() below. (It currently searches for files with
Guido van Rossumb50e1de2002-10-18 18:20:33 +000012# world write permission.)
Guido van Rossum1b789f91993-12-17 14:45:06 +000013
14# Usage: parfind.py [-w nworkers] [directory] ...
Guido van Rossumb50e1de2002-10-18 18:20:33 +000015# Default nworkers is 4
Guido van Rossum1b789f91993-12-17 14:45:06 +000016
17
18import sys
19import getopt
20import string
21import time
22import os
23from stat import *
24import thread
25
26
27# Work queue class. Usage:
28# wq = WorkQ()
29# wq.addwork(func, (arg1, arg2, ...)) # one or more calls
30# wq.run(nworkers)
31# The work is done when wq.run() completes.
32# The function calls executed by the workers may add more work.
33# Don't use keyboard interrupts!
34
35class WorkQ:
36
Tim Peters172c40b2001-01-21 07:07:30 +000037 # Invariants:
Guido van Rossum1b789f91993-12-17 14:45:06 +000038
Tim Peters172c40b2001-01-21 07:07:30 +000039 # - busy and work are only modified when mutex is locked
40 # - len(work) is the number of jobs ready to be taken
41 # - busy is the number of jobs being done
42 # - todo is locked iff there is no work and somebody is busy
Guido van Rossum1b789f91993-12-17 14:45:06 +000043
Tim Peters172c40b2001-01-21 07:07:30 +000044 def __init__(self):
45 self.mutex = thread.allocate()
46 self.todo = thread.allocate()
47 self.todo.acquire()
48 self.work = []
49 self.busy = 0
Guido van Rossum1b789f91993-12-17 14:45:06 +000050
Tim Peters172c40b2001-01-21 07:07:30 +000051 def addwork(self, func, args):
52 job = (func, args)
53 self.mutex.acquire()
54 self.work.append(job)
55 self.mutex.release()
56 if len(self.work) == 1:
57 self.todo.release()
Guido van Rossum1b789f91993-12-17 14:45:06 +000058
Tim Peters172c40b2001-01-21 07:07:30 +000059 def _getwork(self):
60 self.todo.acquire()
61 self.mutex.acquire()
62 if self.busy == 0 and len(self.work) == 0:
63 self.mutex.release()
64 self.todo.release()
65 return None
66 job = self.work[0]
67 del self.work[0]
68 self.busy = self.busy + 1
69 self.mutex.release()
70 if len(self.work) > 0:
71 self.todo.release()
72 return job
Guido van Rossum1b789f91993-12-17 14:45:06 +000073
Tim Peters172c40b2001-01-21 07:07:30 +000074 def _donework(self):
75 self.mutex.acquire()
76 self.busy = self.busy - 1
77 if self.busy == 0 and len(self.work) == 0:
78 self.todo.release()
79 self.mutex.release()
Guido van Rossum1b789f91993-12-17 14:45:06 +000080
Tim Peters172c40b2001-01-21 07:07:30 +000081 def _worker(self):
82 time.sleep(0.00001) # Let other threads run
83 while 1:
84 job = self._getwork()
85 if not job:
86 break
87 func, args = job
88 apply(func, args)
89 self._donework()
Guido van Rossum1b789f91993-12-17 14:45:06 +000090
Tim Peters172c40b2001-01-21 07:07:30 +000091 def run(self, nworkers):
92 if not self.work:
93 return # Nothing to do
94 for i in range(nworkers-1):
95 thread.start_new(self._worker, ())
96 self._worker()
97 self.todo.acquire()
Guido van Rossum1b789f91993-12-17 14:45:06 +000098
99
100# Main program
101
102def main():
Tim Peters172c40b2001-01-21 07:07:30 +0000103 nworkers = 4
104 opts, args = getopt.getopt(sys.argv[1:], '-w:')
105 for opt, arg in opts:
106 if opt == '-w':
107 nworkers = string.atoi(arg)
108 if not args:
109 args = [os.curdir]
Guido van Rossum1b789f91993-12-17 14:45:06 +0000110
Tim Peters172c40b2001-01-21 07:07:30 +0000111 wq = WorkQ()
112 for dir in args:
113 wq.addwork(find, (dir, selector, wq))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000114
Tim Peters172c40b2001-01-21 07:07:30 +0000115 t1 = time.time()
116 wq.run(nworkers)
117 t2 = time.time()
Guido van Rossum1b789f91993-12-17 14:45:06 +0000118
Walter Dörwald70a6b492004-02-12 17:35:32 +0000119 sys.stderr.write('Total time %r sec.\n' % (t2-t1))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000120
121
122# The predicate -- defines what files we look for.
123# Feel free to change this to suit your purpose
124
125def selector(dir, name, fullname, stat):
Guido van Rossumb50e1de2002-10-18 18:20:33 +0000126 # Look for world writable files that are not symlinks
127 return (stat[ST_MODE] & 0002) != 0 and not S_ISLNK(stat[ST_MODE])
Guido van Rossum1b789f91993-12-17 14:45:06 +0000128
129
130# The find procedure -- calls wq.addwork() for subdirectories
131
132def find(dir, pred, wq):
Tim Peters172c40b2001-01-21 07:07:30 +0000133 try:
134 names = os.listdir(dir)
135 except os.error, msg:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000136 print repr(dir), ':', msg
Tim Peters172c40b2001-01-21 07:07:30 +0000137 return
138 for name in names:
139 if name not in (os.curdir, os.pardir):
140 fullname = os.path.join(dir, name)
141 try:
142 stat = os.lstat(fullname)
143 except os.error, msg:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000144 print repr(fullname), ':', msg
Tim Peters172c40b2001-01-21 07:07:30 +0000145 continue
146 if pred(dir, name, fullname, stat):
147 print fullname
148 if S_ISDIR(stat[ST_MODE]):
149 if not os.path.ismount(fullname):
150 wq.addwork(find, (fullname, pred, wq))
Guido van Rossum1b789f91993-12-17 14:45:06 +0000151
152
153# Call the main program
154
155main()