blob: 862a60e92cfae97d6902e29d495c42bf9822ad0e [file] [log] [blame]
Benjamin Peterson7f03ea72008-06-13 19:20:48 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
R. David Murray79af2452010-12-14 01:42:40 +00006# Copyright (c) 2006-2008, R Oudkerk
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12#
13# 1. Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15# 2. Redistributions in binary form must reproduce the above copyright
16# notice, this list of conditions and the following disclaimer in the
17# documentation and/or other materials provided with the distribution.
18# 3. Neither the name of author nor the names of any contributors may be
19# used to endorse or promote products derived from this software
20# without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
23# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32# SUCH DAMAGE.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000033#
34
35__all__ = ['Pool']
36
37#
38# Imports
39#
40
41import threading
42import Queue
43import itertools
44import collections
45import time
46
47from multiprocessing import Process, cpu_count, TimeoutError
48from multiprocessing.util import Finalize, debug
49
50#
51# Constants representing the state of a pool
52#
53
54RUN = 0
55CLOSE = 1
56TERMINATE = 2
57
58#
59# Miscellaneous
60#
61
62job_counter = itertools.count()
63
64def mapstar(args):
65 return map(*args)
66
67#
68# Code run by worker processes
69#
70
Jesse Noller654ade32010-01-27 03:05:57 +000071def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
72 assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000073 put = outqueue.put
74 get = inqueue.get
75 if hasattr(inqueue, '_writer'):
76 inqueue._writer.close()
77 outqueue._reader.close()
78
79 if initializer is not None:
80 initializer(*initargs)
81
Jesse Noller654ade32010-01-27 03:05:57 +000082 completed = 0
83 while maxtasks is None or (maxtasks and completed < maxtasks):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000084 try:
85 task = get()
86 except (EOFError, IOError):
87 debug('worker got EOFError or IOError -- exiting')
88 break
89
90 if task is None:
91 debug('worker got sentinel -- exiting')
92 break
93
94 job, i, func, args, kwds = task
95 try:
96 result = (True, func(*args, **kwds))
97 except Exception, e:
98 result = (False, e)
99 put((job, i, result))
Jesse Noller654ade32010-01-27 03:05:57 +0000100 completed += 1
101 debug('worker exiting after %d tasks' % completed)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000102
103#
104# Class representing a process pool
105#
106
107class Pool(object):
108 '''
109 Class which supports an async version of the `apply()` builtin
110 '''
111 Process = Process
112
Jesse Noller654ade32010-01-27 03:05:57 +0000113 def __init__(self, processes=None, initializer=None, initargs=(),
114 maxtasksperchild=None):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000115 self._setup_queues()
116 self._taskqueue = Queue.Queue()
117 self._cache = {}
118 self._state = RUN
Jesse Noller654ade32010-01-27 03:05:57 +0000119 self._maxtasksperchild = maxtasksperchild
120 self._initializer = initializer
121 self._initargs = initargs
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000122
123 if processes is None:
124 try:
125 processes = cpu_count()
126 except NotImplementedError:
127 processes = 1
128
Jesse Noller7152f6d2009-04-02 05:17:26 +0000129 if initializer is not None and not hasattr(initializer, '__call__'):
130 raise TypeError('initializer must be a callable')
131
Jesse Noller654ade32010-01-27 03:05:57 +0000132 self._processes = processes
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000133 self._pool = []
Jesse Noller654ade32010-01-27 03:05:57 +0000134 self._repopulate_pool()
135
136 self._worker_handler = threading.Thread(
137 target=Pool._handle_workers,
138 args=(self, )
139 )
140 self._worker_handler.daemon = True
141 self._worker_handler._state = RUN
142 self._worker_handler.start()
143
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000144
145 self._task_handler = threading.Thread(
146 target=Pool._handle_tasks,
147 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
148 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000149 self._task_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000150 self._task_handler._state = RUN
151 self._task_handler.start()
152
153 self._result_handler = threading.Thread(
154 target=Pool._handle_results,
155 args=(self._outqueue, self._quick_get, self._cache)
156 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000157 self._result_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000158 self._result_handler._state = RUN
159 self._result_handler.start()
160
161 self._terminate = Finalize(
162 self, self._terminate_pool,
163 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000164 self._worker_handler, self._task_handler,
165 self._result_handler, self._cache),
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000166 exitpriority=15
167 )
168
Jesse Noller654ade32010-01-27 03:05:57 +0000169 def _join_exited_workers(self):
170 """Cleanup after any worker processes which have exited due to reaching
171 their specified lifetime. Returns True if any workers were cleaned up.
172 """
173 cleaned = False
174 for i in reversed(range(len(self._pool))):
175 worker = self._pool[i]
176 if worker.exitcode is not None:
177 # worker exited
178 debug('cleaning up worker %d' % i)
179 worker.join()
180 cleaned = True
181 del self._pool[i]
182 return cleaned
183
184 def _repopulate_pool(self):
185 """Bring the number of pool processes up to the specified number,
186 for use after reaping workers which have exited.
187 """
188 for i in range(self._processes - len(self._pool)):
189 w = self.Process(target=worker,
190 args=(self._inqueue, self._outqueue,
191 self._initializer,
192 self._initargs, self._maxtasksperchild)
193 )
194 self._pool.append(w)
195 w.name = w.name.replace('Process', 'PoolWorker')
196 w.daemon = True
197 w.start()
198 debug('added worker')
199
200 def _maintain_pool(self):
201 """Clean up any exited workers and start replacements for them.
202 """
203 if self._join_exited_workers():
204 self._repopulate_pool()
205
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000206 def _setup_queues(self):
207 from .queues import SimpleQueue
208 self._inqueue = SimpleQueue()
209 self._outqueue = SimpleQueue()
210 self._quick_put = self._inqueue._writer.send
211 self._quick_get = self._outqueue._reader.recv
212
213 def apply(self, func, args=(), kwds={}):
214 '''
215 Equivalent of `apply()` builtin
216 '''
217 assert self._state == RUN
218 return self.apply_async(func, args, kwds).get()
219
220 def map(self, func, iterable, chunksize=None):
221 '''
222 Equivalent of `map()` builtin
223 '''
224 assert self._state == RUN
225 return self.map_async(func, iterable, chunksize).get()
226
227 def imap(self, func, iterable, chunksize=1):
228 '''
Georg Brandl5ecd7452008-11-22 08:45:33 +0000229 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000230 '''
231 assert self._state == RUN
232 if chunksize == 1:
233 result = IMapIterator(self._cache)
234 self._taskqueue.put((((result._job, i, func, (x,), {})
235 for i, x in enumerate(iterable)), result._set_length))
236 return result
237 else:
238 assert chunksize > 1
239 task_batches = Pool._get_tasks(func, iterable, chunksize)
240 result = IMapIterator(self._cache)
241 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
242 for i, x in enumerate(task_batches)), result._set_length))
243 return (item for chunk in result for item in chunk)
244
245 def imap_unordered(self, func, iterable, chunksize=1):
246 '''
247 Like `imap()` method but ordering of results is arbitrary
248 '''
249 assert self._state == RUN
250 if chunksize == 1:
251 result = IMapUnorderedIterator(self._cache)
252 self._taskqueue.put((((result._job, i, func, (x,), {})
253 for i, x in enumerate(iterable)), result._set_length))
254 return result
255 else:
256 assert chunksize > 1
257 task_batches = Pool._get_tasks(func, iterable, chunksize)
258 result = IMapUnorderedIterator(self._cache)
259 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
260 for i, x in enumerate(task_batches)), result._set_length))
261 return (item for chunk in result for item in chunk)
262
263 def apply_async(self, func, args=(), kwds={}, callback=None):
264 '''
265 Asynchronous equivalent of `apply()` builtin
266 '''
267 assert self._state == RUN
268 result = ApplyResult(self._cache, callback)
269 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
270 return result
271
272 def map_async(self, func, iterable, chunksize=None, callback=None):
273 '''
274 Asynchronous equivalent of `map()` builtin
275 '''
276 assert self._state == RUN
277 if not hasattr(iterable, '__len__'):
278 iterable = list(iterable)
279
280 if chunksize is None:
281 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
282 if extra:
283 chunksize += 1
Jesse Noller7530e472009-07-16 14:23:04 +0000284 if len(iterable) == 0:
285 chunksize = 0
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000286
287 task_batches = Pool._get_tasks(func, iterable, chunksize)
288 result = MapResult(self._cache, chunksize, len(iterable), callback)
289 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
290 for i, x in enumerate(task_batches)), None))
291 return result
292
293 @staticmethod
Jesse Noller654ade32010-01-27 03:05:57 +0000294 def _handle_workers(pool):
295 while pool._worker_handler._state == RUN and pool._state == RUN:
296 pool._maintain_pool()
297 time.sleep(0.1)
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200298 # send sentinel to stop workers
299 pool._taskqueue.put(None)
Jesse Noller654ade32010-01-27 03:05:57 +0000300 debug('worker handler exiting')
301
302 @staticmethod
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000303 def _handle_tasks(taskqueue, put, outqueue, pool):
304 thread = threading.current_thread()
305
306 for taskseq, set_length in iter(taskqueue.get, None):
307 i = -1
308 for i, task in enumerate(taskseq):
309 if thread._state:
310 debug('task handler found thread._state != RUN')
311 break
312 try:
313 put(task)
314 except IOError:
315 debug('could not put task on queue')
316 break
317 else:
318 if set_length:
319 debug('doing set_length()')
320 set_length(i+1)
321 continue
322 break
323 else:
324 debug('task handler got sentinel')
325
326
327 try:
328 # tell result handler to finish when cache is empty
329 debug('task handler sending sentinel to result handler')
330 outqueue.put(None)
331
332 # tell workers there is no more work
333 debug('task handler sending sentinel to workers')
334 for p in pool:
335 put(None)
336 except IOError:
337 debug('task handler got IOError when sending sentinels')
338
339 debug('task handler exiting')
340
341 @staticmethod
342 def _handle_results(outqueue, get, cache):
343 thread = threading.current_thread()
344
345 while 1:
346 try:
347 task = get()
348 except (IOError, EOFError):
349 debug('result handler got EOFError/IOError -- exiting')
350 return
351
352 if thread._state:
353 assert thread._state == TERMINATE
354 debug('result handler found thread._state=TERMINATE')
355 break
356
357 if task is None:
358 debug('result handler got sentinel')
359 break
360
361 job, i, obj = task
362 try:
363 cache[job]._set(i, obj)
364 except KeyError:
365 pass
366
367 while cache and thread._state != TERMINATE:
368 try:
369 task = get()
370 except (IOError, EOFError):
371 debug('result handler got EOFError/IOError -- exiting')
372 return
373
374 if task is None:
375 debug('result handler ignoring extra sentinel')
376 continue
377 job, i, obj = task
378 try:
379 cache[job]._set(i, obj)
380 except KeyError:
381 pass
382
383 if hasattr(outqueue, '_reader'):
384 debug('ensuring that outqueue is not full')
385 # If we don't make room available in outqueue then
386 # attempts to add the sentinel (None) to outqueue may
387 # block. There is guaranteed to be no more than 2 sentinels.
388 try:
389 for i in range(10):
390 if not outqueue._reader.poll():
391 break
392 get()
393 except (IOError, EOFError):
394 pass
395
396 debug('result handler exiting: len(cache)=%s, thread._state=%s',
397 len(cache), thread._state)
398
399 @staticmethod
400 def _get_tasks(func, it, size):
401 it = iter(it)
402 while 1:
403 x = tuple(itertools.islice(it, size))
404 if not x:
405 return
406 yield (func, x)
407
408 def __reduce__(self):
409 raise NotImplementedError(
410 'pool objects cannot be passed between processes or pickled'
411 )
412
413 def close(self):
414 debug('closing pool')
415 if self._state == RUN:
416 self._state = CLOSE
Jesse Noller654ade32010-01-27 03:05:57 +0000417 self._worker_handler._state = CLOSE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000418
419 def terminate(self):
420 debug('terminating pool')
421 self._state = TERMINATE
Jesse Noller654ade32010-01-27 03:05:57 +0000422 self._worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000423 self._terminate()
424
425 def join(self):
426 debug('joining pool')
427 assert self._state in (CLOSE, TERMINATE)
Jesse Noller654ade32010-01-27 03:05:57 +0000428 self._worker_handler.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000429 self._task_handler.join()
430 self._result_handler.join()
431 for p in self._pool:
432 p.join()
433
434 @staticmethod
435 def _help_stuff_finish(inqueue, task_handler, size):
436 # task_handler may be blocked trying to put items on inqueue
437 debug('removing tasks from inqueue until task handler finished')
438 inqueue._rlock.acquire()
439 while task_handler.is_alive() and inqueue._reader.poll():
440 inqueue._reader.recv()
441 time.sleep(0)
442
443 @classmethod
444 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000445 worker_handler, task_handler, result_handler, cache):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000446 # this is guaranteed to only be called once
447 debug('finalizing pool')
448
Jesse Noller654ade32010-01-27 03:05:57 +0000449 worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000450 task_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000451
452 debug('helping task handler/workers to finish')
453 cls._help_stuff_finish(inqueue, task_handler, len(pool))
454
455 assert result_handler.is_alive() or len(cache) == 0
456
457 result_handler._state = TERMINATE
458 outqueue.put(None) # sentinel
459
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200460 # We must wait for the worker handler to exit before terminating
461 # workers because we don't want workers to be restarted behind our back.
462 debug('joining worker handler')
463 worker_handler.join()
464
Jesse Noller654ade32010-01-27 03:05:57 +0000465 # Terminate workers which haven't already finished.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000466 if pool and hasattr(pool[0], 'terminate'):
467 debug('terminating workers')
468 for p in pool:
Jesse Noller654ade32010-01-27 03:05:57 +0000469 if p.exitcode is None:
470 p.terminate()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000471
472 debug('joining task handler')
473 task_handler.join(1e100)
474
475 debug('joining result handler')
476 result_handler.join(1e100)
477
478 if pool and hasattr(pool[0], 'terminate'):
479 debug('joining pool workers')
480 for p in pool:
Florent Xiclunad034b322010-03-08 11:01:39 +0000481 if p.is_alive():
Jesse Noller654ade32010-01-27 03:05:57 +0000482 # worker has not yet exited
Florent Xiclunad034b322010-03-08 11:01:39 +0000483 debug('cleaning up worker %d' % p.pid)
484 p.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000485
486#
487# Class whose instances are returned by `Pool.apply_async()`
488#
489
490class ApplyResult(object):
491
492 def __init__(self, cache, callback):
493 self._cond = threading.Condition(threading.Lock())
494 self._job = job_counter.next()
495 self._cache = cache
496 self._ready = False
497 self._callback = callback
498 cache[self._job] = self
499
500 def ready(self):
501 return self._ready
502
503 def successful(self):
504 assert self._ready
505 return self._success
506
507 def wait(self, timeout=None):
508 self._cond.acquire()
509 try:
510 if not self._ready:
511 self._cond.wait(timeout)
512 finally:
513 self._cond.release()
514
515 def get(self, timeout=None):
516 self.wait(timeout)
517 if not self._ready:
518 raise TimeoutError
519 if self._success:
520 return self._value
521 else:
522 raise self._value
523
524 def _set(self, i, obj):
525 self._success, self._value = obj
526 if self._callback and self._success:
527 self._callback(self._value)
528 self._cond.acquire()
529 try:
530 self._ready = True
531 self._cond.notify()
532 finally:
533 self._cond.release()
534 del self._cache[self._job]
535
536#
537# Class whose instances are returned by `Pool.map_async()`
538#
539
540class MapResult(ApplyResult):
541
542 def __init__(self, cache, chunksize, length, callback):
543 ApplyResult.__init__(self, cache, callback)
544 self._success = True
545 self._value = [None] * length
546 self._chunksize = chunksize
547 if chunksize <= 0:
548 self._number_left = 0
549 self._ready = True
550 else:
551 self._number_left = length//chunksize + bool(length % chunksize)
552
553 def _set(self, i, success_result):
554 success, result = success_result
555 if success:
556 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
557 self._number_left -= 1
558 if self._number_left == 0:
559 if self._callback:
560 self._callback(self._value)
561 del self._cache[self._job]
562 self._cond.acquire()
563 try:
564 self._ready = True
565 self._cond.notify()
566 finally:
567 self._cond.release()
568
569 else:
570 self._success = False
571 self._value = result
572 del self._cache[self._job]
573 self._cond.acquire()
574 try:
575 self._ready = True
576 self._cond.notify()
577 finally:
578 self._cond.release()
579
580#
581# Class whose instances are returned by `Pool.imap()`
582#
583
584class IMapIterator(object):
585
586 def __init__(self, cache):
587 self._cond = threading.Condition(threading.Lock())
588 self._job = job_counter.next()
589 self._cache = cache
590 self._items = collections.deque()
591 self._index = 0
592 self._length = None
593 self._unsorted = {}
594 cache[self._job] = self
595
596 def __iter__(self):
597 return self
598
599 def next(self, timeout=None):
600 self._cond.acquire()
601 try:
602 try:
603 item = self._items.popleft()
604 except IndexError:
605 if self._index == self._length:
606 raise StopIteration
607 self._cond.wait(timeout)
608 try:
609 item = self._items.popleft()
610 except IndexError:
611 if self._index == self._length:
612 raise StopIteration
613 raise TimeoutError
614 finally:
615 self._cond.release()
616
617 success, value = item
618 if success:
619 return value
620 raise value
621
622 __next__ = next # XXX
623
624 def _set(self, i, obj):
625 self._cond.acquire()
626 try:
627 if self._index == i:
628 self._items.append(obj)
629 self._index += 1
630 while self._index in self._unsorted:
631 obj = self._unsorted.pop(self._index)
632 self._items.append(obj)
633 self._index += 1
634 self._cond.notify()
635 else:
636 self._unsorted[i] = obj
637
638 if self._index == self._length:
639 del self._cache[self._job]
640 finally:
641 self._cond.release()
642
643 def _set_length(self, length):
644 self._cond.acquire()
645 try:
646 self._length = length
647 if self._index == self._length:
648 self._cond.notify()
649 del self._cache[self._job]
650 finally:
651 self._cond.release()
652
653#
654# Class whose instances are returned by `Pool.imap_unordered()`
655#
656
657class IMapUnorderedIterator(IMapIterator):
658
659 def _set(self, i, obj):
660 self._cond.acquire()
661 try:
662 self._items.append(obj)
663 self._index += 1
664 self._cond.notify()
665 if self._index == self._length:
666 del self._cache[self._job]
667 finally:
668 self._cond.release()
669
670#
671#
672#
673
674class ThreadPool(Pool):
675
676 from .dummy import Process
677
678 def __init__(self, processes=None, initializer=None, initargs=()):
679 Pool.__init__(self, processes, initializer, initargs)
680
681 def _setup_queues(self):
682 self._inqueue = Queue.Queue()
683 self._outqueue = Queue.Queue()
684 self._quick_put = self._inqueue.put
685 self._quick_get = self._outqueue.get
686
687 @staticmethod
688 def _help_stuff_finish(inqueue, task_handler, size):
689 # put sentinels at head of inqueue to make workers finish
690 inqueue.not_empty.acquire()
691 try:
692 inqueue.queue.clear()
693 inqueue.queue.extend([None] * size)
694 inqueue.not_empty.notify_all()
695 finally:
696 inqueue.not_empty.release()