Demo/threads/find.py - platform/external/python/cpython3 - Gitiles

 # A parallelized "find(1)" using the thread module.

 # This demonstrates the use of a work queue and worker threads.
 # It really does do more stats/sec when using multiple threads,
 # although the improvement is only about 20-30 percent.
 # (That was 8 years ago.  In 2002, on Linux, I can't measure
 # a speedup. :-( )

 # I'm too lazy to write a command line parser for the full find(1)
 # command line syntax, so the predicate it searches for is wired-in,
 # see function selector() below.  (It currently searches for files with
 # world write permission.)

 # Usage: parfind.py [-w nworkers] [directory] ...
 # Default nworkers is 4


 import sys
 import getopt
 import time
 import os
 from stat import *
 import _thread as thread


 # Work queue class.  Usage:
 #   wq = WorkQ()
 #   wq.addwork(func, (arg1, arg2, ...)) # one or more calls
 #   wq.run(nworkers)
 # The work is done when wq.run() completes.
 # The function calls executed by the workers may add more work.
 # Don't use keyboard interrupts!

 class WorkQ:

     # Invariants:

     # - busy and work are only modified when mutex is locked
     # - len(work) is the number of jobs ready to be taken
     # - busy is the number of jobs being done
     # - todo is locked iff there is no work and somebody is busy

     def __init__(self):
         self.mutex = thread.allocate()
         self.todo = thread.allocate()
         self.todo.acquire()
         self.work = []
         self.busy = 0

     def addwork(self, func, args):
         job = (func, args)
         self.mutex.acquire()
         self.work.append(job)
         self.mutex.release()
         if len(self.work) == 1:
             self.todo.release()

     def _getwork(self):
         self.todo.acquire()
         self.mutex.acquire()
         if self.busy == 0 and len(self.work) == 0:
             self.mutex.release()
             self.todo.release()
             return None
         job = self.work[0]
         del self.work[0]
         self.busy = self.busy + 1
         self.mutex.release()
         if len(self.work) > 0:
             self.todo.release()
         return job

     def _donework(self):
         self.mutex.acquire()
         self.busy = self.busy - 1
         if self.busy == 0 and len(self.work) == 0:
             self.todo.release()
         self.mutex.release()

     def _worker(self):
         time.sleep(0.00001)     # Let other threads run
         while 1:
             job = self._getwork()
             if not job:
                 break
             func, args = job
             func(*args)
             self._donework()

     def run(self, nworkers):
         if not self.work:
             return # Nothing to do
         for i in range(nworkers-1):
             thread.start_new(self._worker, ())
         self._worker()
         self.todo.acquire()


 # Main program

 def main():
     nworkers = 4
     opts, args = getopt.getopt(sys.argv[1:], '-w:')
     for opt, arg in opts:
         if opt == '-w':
             nworkers = int(arg)
     if not args:
         args = [os.curdir]

     wq = WorkQ()
     for dir in args:
         wq.addwork(find, (dir, selector, wq))

     t1 = time.time()
     wq.run(nworkers)
     t2 = time.time()

     sys.stderr.write('Total time %r sec.\n' % (t2-t1))


 # The predicate -- defines what files we look for.
 # Feel free to change this to suit your purpose

 def selector(dir, name, fullname, stat):
     # Look for world writable files that are not symlinks
     return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])


 # The find procedure -- calls wq.addwork() for subdirectories

 def find(dir, pred, wq):
     try:
         names = os.listdir(dir)
     except os.error as msg:
         print(repr(dir), ':', msg)
         return
     for name in names:
         if name not in (os.curdir, os.pardir):
             fullname = os.path.join(dir, name)
             try:
                 stat = os.lstat(fullname)
             except os.error as msg:
                 print(repr(fullname), ':', msg)
                 continue
             if pred(dir, name, fullname, stat):
                 print(fullname)
             if S_ISDIR(stat[ST_MODE]):
                 if not os.path.ismount(fullname):
                     wq.addwork(find, (fullname, pred, wq))


 # Call the main program

 main()
	# A parallelized "find(1)" using the thread module.

	# This demonstrates the use of a work queue and worker threads.
	# It really does do more stats/sec when using multiple threads,
	# although the improvement is only about 20-30 percent.
	# (That was 8 years ago. In 2002, on Linux, I can't measure
	# a speedup. :-( )

	# I'm too lazy to write a command line parser for the full find(1)
	# command line syntax, so the predicate it searches for is wired-in,
	# see function selector() below. (It currently searches for files with
	# world write permission.)

	# Usage: parfind.py [-w nworkers] [directory] ...
	# Default nworkers is 4


	import sys
	import getopt
	import time
	import os
	from stat import *
	import _thread as thread


	# Work queue class. Usage:
	# wq = WorkQ()
	# wq.addwork(func, (arg1, arg2, ...)) # one or more calls
	# wq.run(nworkers)
	# The work is done when wq.run() completes.
	# The function calls executed by the workers may add more work.
	# Don't use keyboard interrupts!

	class WorkQ:

	# Invariants:

	# - busy and work are only modified when mutex is locked
	# - len(work) is the number of jobs ready to be taken
	# - busy is the number of jobs being done
	# - todo is locked iff there is no work and somebody is busy

	def __init__(self):
	self.mutex = thread.allocate()
	self.todo = thread.allocate()
	self.todo.acquire()
	self.work = []
	self.busy = 0

	def addwork(self, func, args):
	job = (func, args)
	self.mutex.acquire()
	self.work.append(job)
	self.mutex.release()
	if len(self.work) == 1:
	self.todo.release()

	def _getwork(self):
	self.todo.acquire()
	self.mutex.acquire()
	if self.busy == 0 and len(self.work) == 0:
	self.mutex.release()
	self.todo.release()
	return None
	job = self.work[0]
	del self.work[0]
	self.busy = self.busy + 1
	self.mutex.release()
	if len(self.work) > 0:
	self.todo.release()
	return job

	def _donework(self):
	self.mutex.acquire()
	self.busy = self.busy - 1
	if self.busy == 0 and len(self.work) == 0:
	self.todo.release()
	self.mutex.release()

	def _worker(self):
	time.sleep(0.00001) # Let other threads run
	while 1:
	job = self._getwork()
	if not job:
	break
	func, args = job
	func(*args)
	self._donework()

	def run(self, nworkers):
	if not self.work:
	return # Nothing to do
	for i in range(nworkers-1):
	thread.start_new(self._worker, ())
	self._worker()
	self.todo.acquire()


	# Main program

	def main():
	nworkers = 4
	opts, args = getopt.getopt(sys.argv[1:], '-w:')
	for opt, arg in opts:
	if opt == '-w':
	nworkers = int(arg)
	if not args:
	args = [os.curdir]

	wq = WorkQ()
	for dir in args:
	wq.addwork(find, (dir, selector, wq))

	t1 = time.time()
	wq.run(nworkers)
	t2 = time.time()

	sys.stderr.write('Total time %r sec.\n' % (t2-t1))


	# The predicate -- defines what files we look for.
	# Feel free to change this to suit your purpose

	def selector(dir, name, fullname, stat):
	# Look for world writable files that are not symlinks
	return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])


	# The find procedure -- calls wq.addwork() for subdirectories

	def find(dir, pred, wq):
	try:
	names = os.listdir(dir)
	except os.error as msg:
	print(repr(dir), ':', msg)
	return
	for name in names:
	if name not in (os.curdir, os.pardir):
	fullname = os.path.join(dir, name)
	try:
	stat = os.lstat(fullname)
	except os.error as msg:
	print(repr(fullname), ':', msg)
	continue
	if pred(dir, name, fullname, stat):
	print(fullname)
	if S_ISDIR(stat[ST_MODE]):
	if not os.path.ismount(fullname):
	wq.addwork(find, (fullname, pred, wq))


	# Call the main program

	main()