blob: 7154d3c090a7a0f8d183e8454a531d58ea6860ff [file] [log] [blame]
Benjamin Petersone711caf2008-06-11 16:44:04 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
6# Copyright (c) 2007-2008, R Oudkerk --- see COPYING.txt
7#
8
9__all__ = ['Pool']
10
11#
12# Imports
13#
14
15import threading
16import queue
17import itertools
18import collections
19import time
20
21from multiprocessing import Process, cpu_count, TimeoutError
22from multiprocessing.util import Finalize, debug
23
24#
25# Constants representing the state of a pool
26#
27
28RUN = 0
29CLOSE = 1
30TERMINATE = 2
31
32#
33# Miscellaneous
34#
35
36job_counter = itertools.count()
37
38def mapstar(args):
39 return list(map(*args))
40
41#
42# Code run by worker processes
43#
44
Jesse Noller1f0b6582010-01-27 03:36:01 +000045def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
46 assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
Benjamin Petersone711caf2008-06-11 16:44:04 +000047 put = outqueue.put
48 get = inqueue.get
49 if hasattr(inqueue, '_writer'):
50 inqueue._writer.close()
51 outqueue._reader.close()
52
53 if initializer is not None:
54 initializer(*initargs)
55
Jesse Noller1f0b6582010-01-27 03:36:01 +000056 completed = 0
57 while maxtasks is None or (maxtasks and completed < maxtasks):
Benjamin Petersone711caf2008-06-11 16:44:04 +000058 try:
59 task = get()
60 except (EOFError, IOError):
61 debug('worker got EOFError or IOError -- exiting')
62 break
63
64 if task is None:
65 debug('worker got sentinel -- exiting')
66 break
67
68 job, i, func, args, kwds = task
69 try:
70 result = (True, func(*args, **kwds))
71 except Exception as e:
72 result = (False, e)
73 put((job, i, result))
Jesse Noller1f0b6582010-01-27 03:36:01 +000074 completed += 1
75 debug('worker exiting after %d tasks' % completed)
Benjamin Petersone711caf2008-06-11 16:44:04 +000076
77#
78# Class representing a process pool
79#
80
81class Pool(object):
82 '''
Georg Brandl92905032008-11-22 08:51:39 +000083 Class which supports an async version of applying functions to arguments.
Benjamin Petersone711caf2008-06-11 16:44:04 +000084 '''
85 Process = Process
86
Jesse Noller1f0b6582010-01-27 03:36:01 +000087 def __init__(self, processes=None, initializer=None, initargs=(),
88 maxtasksperchild=None):
Benjamin Petersone711caf2008-06-11 16:44:04 +000089 self._setup_queues()
90 self._taskqueue = queue.Queue()
91 self._cache = {}
92 self._state = RUN
Jesse Noller1f0b6582010-01-27 03:36:01 +000093 self._maxtasksperchild = maxtasksperchild
94 self._initializer = initializer
95 self._initargs = initargs
Benjamin Petersone711caf2008-06-11 16:44:04 +000096
97 if processes is None:
98 try:
99 processes = cpu_count()
100 except NotImplementedError:
101 processes = 1
102
Benjamin Petersonf47ed4a2009-04-11 20:45:40 +0000103 if initializer is not None and not hasattr(initializer, '__call__'):
104 raise TypeError('initializer must be a callable')
105
Jesse Noller1f0b6582010-01-27 03:36:01 +0000106 self._processes = processes
Benjamin Petersone711caf2008-06-11 16:44:04 +0000107 self._pool = []
Jesse Noller1f0b6582010-01-27 03:36:01 +0000108 self._repopulate_pool()
109
110 self._worker_handler = threading.Thread(
111 target=Pool._handle_workers,
112 args=(self, )
113 )
114 self._worker_handler.daemon = True
115 self._worker_handler._state = RUN
116 self._worker_handler.start()
117
Benjamin Petersone711caf2008-06-11 16:44:04 +0000118
119 self._task_handler = threading.Thread(
120 target=Pool._handle_tasks,
121 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
122 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000123 self._task_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000124 self._task_handler._state = RUN
125 self._task_handler.start()
126
127 self._result_handler = threading.Thread(
128 target=Pool._handle_results,
129 args=(self._outqueue, self._quick_get, self._cache)
130 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000131 self._result_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000132 self._result_handler._state = RUN
133 self._result_handler.start()
134
135 self._terminate = Finalize(
136 self, self._terminate_pool,
137 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
Jesse Noller1f0b6582010-01-27 03:36:01 +0000138 self._worker_handler, self._task_handler,
139 self._result_handler, self._cache),
Benjamin Petersone711caf2008-06-11 16:44:04 +0000140 exitpriority=15
141 )
142
Jesse Noller1f0b6582010-01-27 03:36:01 +0000143 def _join_exited_workers(self):
144 """Cleanup after any worker processes which have exited due to reaching
145 their specified lifetime. Returns True if any workers were cleaned up.
146 """
147 cleaned = False
148 for i in reversed(range(len(self._pool))):
149 worker = self._pool[i]
150 if worker.exitcode is not None:
151 # worker exited
152 debug('cleaning up worker %d' % i)
153 worker.join()
154 cleaned = True
155 del self._pool[i]
156 return cleaned
157
158 def _repopulate_pool(self):
159 """Bring the number of pool processes up to the specified number,
160 for use after reaping workers which have exited.
161 """
162 for i in range(self._processes - len(self._pool)):
163 w = self.Process(target=worker,
164 args=(self._inqueue, self._outqueue,
165 self._initializer,
166 self._initargs, self._maxtasksperchild)
167 )
168 self._pool.append(w)
169 w.name = w.name.replace('Process', 'PoolWorker')
170 w.daemon = True
171 w.start()
172 debug('added worker')
173
174 def _maintain_pool(self):
175 """Clean up any exited workers and start replacements for them.
176 """
177 if self._join_exited_workers():
178 self._repopulate_pool()
179
Benjamin Petersone711caf2008-06-11 16:44:04 +0000180 def _setup_queues(self):
181 from .queues import SimpleQueue
182 self._inqueue = SimpleQueue()
183 self._outqueue = SimpleQueue()
184 self._quick_put = self._inqueue._writer.send
185 self._quick_get = self._outqueue._reader.recv
186
187 def apply(self, func, args=(), kwds={}):
188 '''
Georg Brandl92905032008-11-22 08:51:39 +0000189 Equivalent of `func(*args, **kwds)`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000190 '''
191 assert self._state == RUN
192 return self.apply_async(func, args, kwds).get()
193
194 def map(self, func, iterable, chunksize=None):
195 '''
Georg Brandl92905032008-11-22 08:51:39 +0000196 Apply `func` to each element in `iterable`, collecting the results
197 in a list that is returned.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000198 '''
199 assert self._state == RUN
200 return self.map_async(func, iterable, chunksize).get()
201
202 def imap(self, func, iterable, chunksize=1):
203 '''
Georg Brandl92905032008-11-22 08:51:39 +0000204 Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000205 '''
206 assert self._state == RUN
207 if chunksize == 1:
208 result = IMapIterator(self._cache)
209 self._taskqueue.put((((result._job, i, func, (x,), {})
210 for i, x in enumerate(iterable)), result._set_length))
211 return result
212 else:
213 assert chunksize > 1
214 task_batches = Pool._get_tasks(func, iterable, chunksize)
215 result = IMapIterator(self._cache)
216 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
217 for i, x in enumerate(task_batches)), result._set_length))
218 return (item for chunk in result for item in chunk)
219
220 def imap_unordered(self, func, iterable, chunksize=1):
221 '''
Georg Brandl92905032008-11-22 08:51:39 +0000222 Like `imap()` method but ordering of results is arbitrary.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000223 '''
224 assert self._state == RUN
225 if chunksize == 1:
226 result = IMapUnorderedIterator(self._cache)
227 self._taskqueue.put((((result._job, i, func, (x,), {})
228 for i, x in enumerate(iterable)), result._set_length))
229 return result
230 else:
231 assert chunksize > 1
232 task_batches = Pool._get_tasks(func, iterable, chunksize)
233 result = IMapUnorderedIterator(self._cache)
234 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
235 for i, x in enumerate(task_batches)), result._set_length))
236 return (item for chunk in result for item in chunk)
237
238 def apply_async(self, func, args=(), kwds={}, callback=None):
239 '''
Georg Brandl92905032008-11-22 08:51:39 +0000240 Asynchronous version of `apply()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000241 '''
242 assert self._state == RUN
243 result = ApplyResult(self._cache, callback)
244 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
245 return result
246
247 def map_async(self, func, iterable, chunksize=None, callback=None):
248 '''
Georg Brandl92905032008-11-22 08:51:39 +0000249 Asynchronous version of `map()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000250 '''
251 assert self._state == RUN
252 if not hasattr(iterable, '__len__'):
253 iterable = list(iterable)
254
255 if chunksize is None:
256 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
257 if extra:
258 chunksize += 1
Alexandre Vassalottie52e3782009-07-17 09:18:18 +0000259 if len(iterable) == 0:
260 chunksize = 0
Benjamin Petersone711caf2008-06-11 16:44:04 +0000261
262 task_batches = Pool._get_tasks(func, iterable, chunksize)
263 result = MapResult(self._cache, chunksize, len(iterable), callback)
264 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
265 for i, x in enumerate(task_batches)), None))
266 return result
267
268 @staticmethod
Jesse Noller1f0b6582010-01-27 03:36:01 +0000269 def _handle_workers(pool):
270 while pool._worker_handler._state == RUN and pool._state == RUN:
271 pool._maintain_pool()
272 time.sleep(0.1)
273 debug('worker handler exiting')
274
275 @staticmethod
Benjamin Petersone711caf2008-06-11 16:44:04 +0000276 def _handle_tasks(taskqueue, put, outqueue, pool):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000277 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000278
279 for taskseq, set_length in iter(taskqueue.get, None):
280 i = -1
281 for i, task in enumerate(taskseq):
282 if thread._state:
283 debug('task handler found thread._state != RUN')
284 break
285 try:
286 put(task)
287 except IOError:
288 debug('could not put task on queue')
289 break
290 else:
291 if set_length:
292 debug('doing set_length()')
293 set_length(i+1)
294 continue
295 break
296 else:
297 debug('task handler got sentinel')
298
299
300 try:
301 # tell result handler to finish when cache is empty
302 debug('task handler sending sentinel to result handler')
303 outqueue.put(None)
304
305 # tell workers there is no more work
306 debug('task handler sending sentinel to workers')
307 for p in pool:
308 put(None)
309 except IOError:
310 debug('task handler got IOError when sending sentinels')
311
312 debug('task handler exiting')
313
314 @staticmethod
315 def _handle_results(outqueue, get, cache):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000316 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000317
318 while 1:
319 try:
320 task = get()
321 except (IOError, EOFError):
322 debug('result handler got EOFError/IOError -- exiting')
323 return
324
325 if thread._state:
326 assert thread._state == TERMINATE
327 debug('result handler found thread._state=TERMINATE')
328 break
329
330 if task is None:
331 debug('result handler got sentinel')
332 break
333
334 job, i, obj = task
335 try:
336 cache[job]._set(i, obj)
337 except KeyError:
338 pass
339
340 while cache and thread._state != TERMINATE:
341 try:
342 task = get()
343 except (IOError, EOFError):
344 debug('result handler got EOFError/IOError -- exiting')
345 return
346
347 if task is None:
348 debug('result handler ignoring extra sentinel')
349 continue
350 job, i, obj = task
351 try:
352 cache[job]._set(i, obj)
353 except KeyError:
354 pass
355
356 if hasattr(outqueue, '_reader'):
357 debug('ensuring that outqueue is not full')
358 # If we don't make room available in outqueue then
359 # attempts to add the sentinel (None) to outqueue may
360 # block. There is guaranteed to be no more than 2 sentinels.
361 try:
362 for i in range(10):
363 if not outqueue._reader.poll():
364 break
365 get()
366 except (IOError, EOFError):
367 pass
368
369 debug('result handler exiting: len(cache)=%s, thread._state=%s',
370 len(cache), thread._state)
371
372 @staticmethod
373 def _get_tasks(func, it, size):
374 it = iter(it)
375 while 1:
376 x = tuple(itertools.islice(it, size))
377 if not x:
378 return
379 yield (func, x)
380
381 def __reduce__(self):
382 raise NotImplementedError(
383 'pool objects cannot be passed between processes or pickled'
384 )
385
386 def close(self):
387 debug('closing pool')
388 if self._state == RUN:
389 self._state = CLOSE
Jesse Noller1f0b6582010-01-27 03:36:01 +0000390 self._worker_handler._state = CLOSE
Benjamin Petersone711caf2008-06-11 16:44:04 +0000391 self._taskqueue.put(None)
392
393 def terminate(self):
394 debug('terminating pool')
395 self._state = TERMINATE
Jesse Noller1f0b6582010-01-27 03:36:01 +0000396 self._worker_handler._state = TERMINATE
Benjamin Petersone711caf2008-06-11 16:44:04 +0000397 self._terminate()
398
399 def join(self):
400 debug('joining pool')
401 assert self._state in (CLOSE, TERMINATE)
Jesse Noller1f0b6582010-01-27 03:36:01 +0000402 self._worker_handler.join()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000403 self._task_handler.join()
404 self._result_handler.join()
405 for p in self._pool:
406 p.join()
407
408 @staticmethod
409 def _help_stuff_finish(inqueue, task_handler, size):
410 # task_handler may be blocked trying to put items on inqueue
411 debug('removing tasks from inqueue until task handler finished')
412 inqueue._rlock.acquire()
Benjamin Peterson672b8032008-06-11 19:14:14 +0000413 while task_handler.is_alive() and inqueue._reader.poll():
Benjamin Petersone711caf2008-06-11 16:44:04 +0000414 inqueue._reader.recv()
415 time.sleep(0)
416
417 @classmethod
418 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
Jesse Noller1f0b6582010-01-27 03:36:01 +0000419 worker_handler, task_handler, result_handler, cache):
Benjamin Petersone711caf2008-06-11 16:44:04 +0000420 # this is guaranteed to only be called once
421 debug('finalizing pool')
422
Jesse Noller1f0b6582010-01-27 03:36:01 +0000423 worker_handler._state = TERMINATE
Benjamin Petersone711caf2008-06-11 16:44:04 +0000424 task_handler._state = TERMINATE
425 taskqueue.put(None) # sentinel
426
427 debug('helping task handler/workers to finish')
428 cls._help_stuff_finish(inqueue, task_handler, len(pool))
429
Benjamin Peterson672b8032008-06-11 19:14:14 +0000430 assert result_handler.is_alive() or len(cache) == 0
Benjamin Petersone711caf2008-06-11 16:44:04 +0000431
432 result_handler._state = TERMINATE
433 outqueue.put(None) # sentinel
434
Jesse Noller1f0b6582010-01-27 03:36:01 +0000435 # Terminate workers which haven't already finished.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000436 if pool and hasattr(pool[0], 'terminate'):
437 debug('terminating workers')
438 for p in pool:
Jesse Noller1f0b6582010-01-27 03:36:01 +0000439 if p.exitcode is None:
440 p.terminate()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000441
442 debug('joining task handler')
Antoine Pitrou7c3e5772010-04-14 15:44:10 +0000443 task_handler.join()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000444
445 debug('joining result handler')
Antoine Pitrou7c3e5772010-04-14 15:44:10 +0000446 task_handler.join()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000447
448 if pool and hasattr(pool[0], 'terminate'):
449 debug('joining pool workers')
450 for p in pool:
Florent Xicluna998171f2010-03-08 13:32:17 +0000451 if p.is_alive():
Jesse Noller1f0b6582010-01-27 03:36:01 +0000452 # worker has not yet exited
Florent Xicluna998171f2010-03-08 13:32:17 +0000453 debug('cleaning up worker %d' % p.pid)
454 p.join()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000455
456#
457# Class whose instances are returned by `Pool.apply_async()`
458#
459
460class ApplyResult(object):
461
462 def __init__(self, cache, callback):
463 self._cond = threading.Condition(threading.Lock())
464 self._job = next(job_counter)
465 self._cache = cache
466 self._ready = False
467 self._callback = callback
468 cache[self._job] = self
469
470 def ready(self):
471 return self._ready
472
473 def successful(self):
474 assert self._ready
475 return self._success
476
477 def wait(self, timeout=None):
478 self._cond.acquire()
479 try:
480 if not self._ready:
481 self._cond.wait(timeout)
482 finally:
483 self._cond.release()
484
485 def get(self, timeout=None):
486 self.wait(timeout)
487 if not self._ready:
488 raise TimeoutError
489 if self._success:
490 return self._value
491 else:
492 raise self._value
493
494 def _set(self, i, obj):
495 self._success, self._value = obj
496 if self._callback and self._success:
497 self._callback(self._value)
498 self._cond.acquire()
499 try:
500 self._ready = True
501 self._cond.notify()
502 finally:
503 self._cond.release()
504 del self._cache[self._job]
505
506#
507# Class whose instances are returned by `Pool.map_async()`
508#
509
510class MapResult(ApplyResult):
511
512 def __init__(self, cache, chunksize, length, callback):
513 ApplyResult.__init__(self, cache, callback)
514 self._success = True
515 self._value = [None] * length
516 self._chunksize = chunksize
517 if chunksize <= 0:
518 self._number_left = 0
519 self._ready = True
520 else:
521 self._number_left = length//chunksize + bool(length % chunksize)
522
523 def _set(self, i, success_result):
524 success, result = success_result
525 if success:
526 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
527 self._number_left -= 1
528 if self._number_left == 0:
529 if self._callback:
530 self._callback(self._value)
531 del self._cache[self._job]
532 self._cond.acquire()
533 try:
534 self._ready = True
535 self._cond.notify()
536 finally:
537 self._cond.release()
538
539 else:
540 self._success = False
541 self._value = result
542 del self._cache[self._job]
543 self._cond.acquire()
544 try:
545 self._ready = True
546 self._cond.notify()
547 finally:
548 self._cond.release()
549
550#
551# Class whose instances are returned by `Pool.imap()`
552#
553
554class IMapIterator(object):
555
556 def __init__(self, cache):
557 self._cond = threading.Condition(threading.Lock())
558 self._job = next(job_counter)
559 self._cache = cache
560 self._items = collections.deque()
561 self._index = 0
562 self._length = None
563 self._unsorted = {}
564 cache[self._job] = self
565
566 def __iter__(self):
567 return self
568
569 def next(self, timeout=None):
570 self._cond.acquire()
571 try:
572 try:
573 item = self._items.popleft()
574 except IndexError:
575 if self._index == self._length:
576 raise StopIteration
577 self._cond.wait(timeout)
578 try:
579 item = self._items.popleft()
580 except IndexError:
581 if self._index == self._length:
582 raise StopIteration
583 raise TimeoutError
584 finally:
585 self._cond.release()
586
587 success, value = item
588 if success:
589 return value
590 raise value
591
592 __next__ = next # XXX
593
594 def _set(self, i, obj):
595 self._cond.acquire()
596 try:
597 if self._index == i:
598 self._items.append(obj)
599 self._index += 1
600 while self._index in self._unsorted:
601 obj = self._unsorted.pop(self._index)
602 self._items.append(obj)
603 self._index += 1
604 self._cond.notify()
605 else:
606 self._unsorted[i] = obj
607
608 if self._index == self._length:
609 del self._cache[self._job]
610 finally:
611 self._cond.release()
612
613 def _set_length(self, length):
614 self._cond.acquire()
615 try:
616 self._length = length
617 if self._index == self._length:
618 self._cond.notify()
619 del self._cache[self._job]
620 finally:
621 self._cond.release()
622
623#
624# Class whose instances are returned by `Pool.imap_unordered()`
625#
626
627class IMapUnorderedIterator(IMapIterator):
628
629 def _set(self, i, obj):
630 self._cond.acquire()
631 try:
632 self._items.append(obj)
633 self._index += 1
634 self._cond.notify()
635 if self._index == self._length:
636 del self._cache[self._job]
637 finally:
638 self._cond.release()
639
640#
641#
642#
643
644class ThreadPool(Pool):
645
646 from .dummy import Process
647
648 def __init__(self, processes=None, initializer=None, initargs=()):
649 Pool.__init__(self, processes, initializer, initargs)
650
651 def _setup_queues(self):
652 self._inqueue = queue.Queue()
653 self._outqueue = queue.Queue()
654 self._quick_put = self._inqueue.put
655 self._quick_get = self._outqueue.get
656
657 @staticmethod
658 def _help_stuff_finish(inqueue, task_handler, size):
659 # put sentinels at head of inqueue to make workers finish
660 inqueue.not_empty.acquire()
661 try:
662 inqueue.queue.clear()
663 inqueue.queue.extend([None] * size)
Benjamin Peterson672b8032008-06-11 19:14:14 +0000664 inqueue.not_empty.notify_all()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000665 finally:
666 inqueue.not_empty.release()