blob: bcbf7e350310c9eeea9025a91e6e750ff2655287 [file] [log] [blame]
Benjamin Peterson7f03ea72008-06-13 19:20:48 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
R. David Murray79af2452010-12-14 01:42:40 +00006# Copyright (c) 2006-2008, R Oudkerk
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12#
13# 1. Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15# 2. Redistributions in binary form must reproduce the above copyright
16# notice, this list of conditions and the following disclaimer in the
17# documentation and/or other materials provided with the distribution.
18# 3. Neither the name of author nor the names of any contributors may be
19# used to endorse or promote products derived from this software
20# without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
23# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32# SUCH DAMAGE.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000033#
34
35__all__ = ['Pool']
36
37#
38# Imports
39#
40
41import threading
42import Queue
43import itertools
44import collections
45import time
46
47from multiprocessing import Process, cpu_count, TimeoutError
48from multiprocessing.util import Finalize, debug
49
50#
51# Constants representing the state of a pool
52#
53
54RUN = 0
55CLOSE = 1
56TERMINATE = 2
57
58#
59# Miscellaneous
60#
61
62job_counter = itertools.count()
63
64def mapstar(args):
65 return map(*args)
66
67#
68# Code run by worker processes
69#
70
Jesse Noller654ade32010-01-27 03:05:57 +000071def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
72 assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000073 put = outqueue.put
74 get = inqueue.get
75 if hasattr(inqueue, '_writer'):
76 inqueue._writer.close()
77 outqueue._reader.close()
78
79 if initializer is not None:
80 initializer(*initargs)
81
Jesse Noller654ade32010-01-27 03:05:57 +000082 completed = 0
83 while maxtasks is None or (maxtasks and completed < maxtasks):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000084 try:
85 task = get()
86 except (EOFError, IOError):
87 debug('worker got EOFError or IOError -- exiting')
88 break
89
90 if task is None:
91 debug('worker got sentinel -- exiting')
92 break
93
94 job, i, func, args, kwds = task
95 try:
96 result = (True, func(*args, **kwds))
97 except Exception, e:
98 result = (False, e)
99 put((job, i, result))
Jesse Noller654ade32010-01-27 03:05:57 +0000100 completed += 1
101 debug('worker exiting after %d tasks' % completed)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000102
103#
104# Class representing a process pool
105#
106
107class Pool(object):
108 '''
109 Class which supports an async version of the `apply()` builtin
110 '''
111 Process = Process
112
Jesse Noller654ade32010-01-27 03:05:57 +0000113 def __init__(self, processes=None, initializer=None, initargs=(),
114 maxtasksperchild=None):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000115 self._setup_queues()
116 self._taskqueue = Queue.Queue()
117 self._cache = {}
118 self._state = RUN
Jesse Noller654ade32010-01-27 03:05:57 +0000119 self._maxtasksperchild = maxtasksperchild
120 self._initializer = initializer
121 self._initargs = initargs
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000122
123 if processes is None:
124 try:
125 processes = cpu_count()
126 except NotImplementedError:
127 processes = 1
Victor Stinnerf64a0cf2011-06-20 17:54:33 +0200128 if processes < 1:
129 raise ValueError("Number of processes must be at least 1")
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000130
Jesse Noller7152f6d2009-04-02 05:17:26 +0000131 if initializer is not None and not hasattr(initializer, '__call__'):
132 raise TypeError('initializer must be a callable')
133
Jesse Noller654ade32010-01-27 03:05:57 +0000134 self._processes = processes
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000135 self._pool = []
Jesse Noller654ade32010-01-27 03:05:57 +0000136 self._repopulate_pool()
137
138 self._worker_handler = threading.Thread(
139 target=Pool._handle_workers,
140 args=(self, )
141 )
142 self._worker_handler.daemon = True
143 self._worker_handler._state = RUN
144 self._worker_handler.start()
145
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000146
147 self._task_handler = threading.Thread(
148 target=Pool._handle_tasks,
149 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
150 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000151 self._task_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000152 self._task_handler._state = RUN
153 self._task_handler.start()
154
155 self._result_handler = threading.Thread(
156 target=Pool._handle_results,
157 args=(self._outqueue, self._quick_get, self._cache)
158 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000159 self._result_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000160 self._result_handler._state = RUN
161 self._result_handler.start()
162
163 self._terminate = Finalize(
164 self, self._terminate_pool,
165 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000166 self._worker_handler, self._task_handler,
167 self._result_handler, self._cache),
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000168 exitpriority=15
169 )
170
Jesse Noller654ade32010-01-27 03:05:57 +0000171 def _join_exited_workers(self):
172 """Cleanup after any worker processes which have exited due to reaching
173 their specified lifetime. Returns True if any workers were cleaned up.
174 """
175 cleaned = False
176 for i in reversed(range(len(self._pool))):
177 worker = self._pool[i]
178 if worker.exitcode is not None:
179 # worker exited
180 debug('cleaning up worker %d' % i)
181 worker.join()
182 cleaned = True
183 del self._pool[i]
184 return cleaned
185
186 def _repopulate_pool(self):
187 """Bring the number of pool processes up to the specified number,
188 for use after reaping workers which have exited.
189 """
190 for i in range(self._processes - len(self._pool)):
191 w = self.Process(target=worker,
192 args=(self._inqueue, self._outqueue,
193 self._initializer,
194 self._initargs, self._maxtasksperchild)
195 )
196 self._pool.append(w)
197 w.name = w.name.replace('Process', 'PoolWorker')
198 w.daemon = True
199 w.start()
200 debug('added worker')
201
202 def _maintain_pool(self):
203 """Clean up any exited workers and start replacements for them.
204 """
205 if self._join_exited_workers():
206 self._repopulate_pool()
207
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000208 def _setup_queues(self):
209 from .queues import SimpleQueue
210 self._inqueue = SimpleQueue()
211 self._outqueue = SimpleQueue()
212 self._quick_put = self._inqueue._writer.send
213 self._quick_get = self._outqueue._reader.recv
214
215 def apply(self, func, args=(), kwds={}):
216 '''
217 Equivalent of `apply()` builtin
218 '''
219 assert self._state == RUN
220 return self.apply_async(func, args, kwds).get()
221
222 def map(self, func, iterable, chunksize=None):
223 '''
224 Equivalent of `map()` builtin
225 '''
226 assert self._state == RUN
227 return self.map_async(func, iterable, chunksize).get()
228
229 def imap(self, func, iterable, chunksize=1):
230 '''
Georg Brandl5ecd7452008-11-22 08:45:33 +0000231 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000232 '''
233 assert self._state == RUN
234 if chunksize == 1:
235 result = IMapIterator(self._cache)
236 self._taskqueue.put((((result._job, i, func, (x,), {})
237 for i, x in enumerate(iterable)), result._set_length))
238 return result
239 else:
240 assert chunksize > 1
241 task_batches = Pool._get_tasks(func, iterable, chunksize)
242 result = IMapIterator(self._cache)
243 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
244 for i, x in enumerate(task_batches)), result._set_length))
245 return (item for chunk in result for item in chunk)
246
247 def imap_unordered(self, func, iterable, chunksize=1):
248 '''
249 Like `imap()` method but ordering of results is arbitrary
250 '''
251 assert self._state == RUN
252 if chunksize == 1:
253 result = IMapUnorderedIterator(self._cache)
254 self._taskqueue.put((((result._job, i, func, (x,), {})
255 for i, x in enumerate(iterable)), result._set_length))
256 return result
257 else:
258 assert chunksize > 1
259 task_batches = Pool._get_tasks(func, iterable, chunksize)
260 result = IMapUnorderedIterator(self._cache)
261 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
262 for i, x in enumerate(task_batches)), result._set_length))
263 return (item for chunk in result for item in chunk)
264
265 def apply_async(self, func, args=(), kwds={}, callback=None):
266 '''
267 Asynchronous equivalent of `apply()` builtin
268 '''
269 assert self._state == RUN
270 result = ApplyResult(self._cache, callback)
271 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
272 return result
273
274 def map_async(self, func, iterable, chunksize=None, callback=None):
275 '''
276 Asynchronous equivalent of `map()` builtin
277 '''
278 assert self._state == RUN
279 if not hasattr(iterable, '__len__'):
280 iterable = list(iterable)
281
282 if chunksize is None:
283 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
284 if extra:
285 chunksize += 1
Jesse Noller7530e472009-07-16 14:23:04 +0000286 if len(iterable) == 0:
287 chunksize = 0
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000288
289 task_batches = Pool._get_tasks(func, iterable, chunksize)
290 result = MapResult(self._cache, chunksize, len(iterable), callback)
291 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
292 for i, x in enumerate(task_batches)), None))
293 return result
294
295 @staticmethod
Jesse Noller654ade32010-01-27 03:05:57 +0000296 def _handle_workers(pool):
Charles-François Natali46f990e2011-10-24 18:43:51 +0200297 thread = threading.current_thread()
298
299 # Keep maintaining workers until the cache gets drained, unless the pool
300 # is terminated.
301 while thread._state == RUN or (pool._cache and thread._state != TERMINATE):
Jesse Noller654ade32010-01-27 03:05:57 +0000302 pool._maintain_pool()
303 time.sleep(0.1)
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200304 # send sentinel to stop workers
305 pool._taskqueue.put(None)
Jesse Noller654ade32010-01-27 03:05:57 +0000306 debug('worker handler exiting')
307
308 @staticmethod
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000309 def _handle_tasks(taskqueue, put, outqueue, pool):
310 thread = threading.current_thread()
311
312 for taskseq, set_length in iter(taskqueue.get, None):
313 i = -1
314 for i, task in enumerate(taskseq):
315 if thread._state:
316 debug('task handler found thread._state != RUN')
317 break
318 try:
319 put(task)
320 except IOError:
321 debug('could not put task on queue')
322 break
323 else:
324 if set_length:
325 debug('doing set_length()')
326 set_length(i+1)
327 continue
328 break
329 else:
330 debug('task handler got sentinel')
331
332
333 try:
334 # tell result handler to finish when cache is empty
335 debug('task handler sending sentinel to result handler')
336 outqueue.put(None)
337
338 # tell workers there is no more work
339 debug('task handler sending sentinel to workers')
340 for p in pool:
341 put(None)
342 except IOError:
343 debug('task handler got IOError when sending sentinels')
344
345 debug('task handler exiting')
346
347 @staticmethod
348 def _handle_results(outqueue, get, cache):
349 thread = threading.current_thread()
350
351 while 1:
352 try:
353 task = get()
354 except (IOError, EOFError):
355 debug('result handler got EOFError/IOError -- exiting')
356 return
357
358 if thread._state:
359 assert thread._state == TERMINATE
360 debug('result handler found thread._state=TERMINATE')
361 break
362
363 if task is None:
364 debug('result handler got sentinel')
365 break
366
367 job, i, obj = task
368 try:
369 cache[job]._set(i, obj)
370 except KeyError:
371 pass
372
373 while cache and thread._state != TERMINATE:
374 try:
375 task = get()
376 except (IOError, EOFError):
377 debug('result handler got EOFError/IOError -- exiting')
378 return
379
380 if task is None:
381 debug('result handler ignoring extra sentinel')
382 continue
383 job, i, obj = task
384 try:
385 cache[job]._set(i, obj)
386 except KeyError:
387 pass
388
389 if hasattr(outqueue, '_reader'):
390 debug('ensuring that outqueue is not full')
391 # If we don't make room available in outqueue then
392 # attempts to add the sentinel (None) to outqueue may
393 # block. There is guaranteed to be no more than 2 sentinels.
394 try:
395 for i in range(10):
396 if not outqueue._reader.poll():
397 break
398 get()
399 except (IOError, EOFError):
400 pass
401
402 debug('result handler exiting: len(cache)=%s, thread._state=%s',
403 len(cache), thread._state)
404
405 @staticmethod
406 def _get_tasks(func, it, size):
407 it = iter(it)
408 while 1:
409 x = tuple(itertools.islice(it, size))
410 if not x:
411 return
412 yield (func, x)
413
414 def __reduce__(self):
415 raise NotImplementedError(
416 'pool objects cannot be passed between processes or pickled'
417 )
418
419 def close(self):
420 debug('closing pool')
421 if self._state == RUN:
422 self._state = CLOSE
Jesse Noller654ade32010-01-27 03:05:57 +0000423 self._worker_handler._state = CLOSE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000424
425 def terminate(self):
426 debug('terminating pool')
427 self._state = TERMINATE
Jesse Noller654ade32010-01-27 03:05:57 +0000428 self._worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000429 self._terminate()
430
431 def join(self):
432 debug('joining pool')
433 assert self._state in (CLOSE, TERMINATE)
Jesse Noller654ade32010-01-27 03:05:57 +0000434 self._worker_handler.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000435 self._task_handler.join()
436 self._result_handler.join()
437 for p in self._pool:
438 p.join()
439
440 @staticmethod
441 def _help_stuff_finish(inqueue, task_handler, size):
442 # task_handler may be blocked trying to put items on inqueue
443 debug('removing tasks from inqueue until task handler finished')
444 inqueue._rlock.acquire()
445 while task_handler.is_alive() and inqueue._reader.poll():
446 inqueue._reader.recv()
447 time.sleep(0)
448
449 @classmethod
450 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000451 worker_handler, task_handler, result_handler, cache):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000452 # this is guaranteed to only be called once
453 debug('finalizing pool')
454
Jesse Noller654ade32010-01-27 03:05:57 +0000455 worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000456 task_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000457
458 debug('helping task handler/workers to finish')
459 cls._help_stuff_finish(inqueue, task_handler, len(pool))
460
461 assert result_handler.is_alive() or len(cache) == 0
462
463 result_handler._state = TERMINATE
464 outqueue.put(None) # sentinel
465
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200466 # We must wait for the worker handler to exit before terminating
467 # workers because we don't want workers to be restarted behind our back.
468 debug('joining worker handler')
469 worker_handler.join()
470
Jesse Noller654ade32010-01-27 03:05:57 +0000471 # Terminate workers which haven't already finished.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000472 if pool and hasattr(pool[0], 'terminate'):
473 debug('terminating workers')
474 for p in pool:
Jesse Noller654ade32010-01-27 03:05:57 +0000475 if p.exitcode is None:
476 p.terminate()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000477
478 debug('joining task handler')
479 task_handler.join(1e100)
480
481 debug('joining result handler')
482 result_handler.join(1e100)
483
484 if pool and hasattr(pool[0], 'terminate'):
485 debug('joining pool workers')
486 for p in pool:
Florent Xiclunad034b322010-03-08 11:01:39 +0000487 if p.is_alive():
Jesse Noller654ade32010-01-27 03:05:57 +0000488 # worker has not yet exited
Florent Xiclunad034b322010-03-08 11:01:39 +0000489 debug('cleaning up worker %d' % p.pid)
490 p.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000491
492#
493# Class whose instances are returned by `Pool.apply_async()`
494#
495
496class ApplyResult(object):
497
498 def __init__(self, cache, callback):
499 self._cond = threading.Condition(threading.Lock())
500 self._job = job_counter.next()
501 self._cache = cache
502 self._ready = False
503 self._callback = callback
504 cache[self._job] = self
505
506 def ready(self):
507 return self._ready
508
509 def successful(self):
510 assert self._ready
511 return self._success
512
513 def wait(self, timeout=None):
514 self._cond.acquire()
515 try:
516 if not self._ready:
517 self._cond.wait(timeout)
518 finally:
519 self._cond.release()
520
521 def get(self, timeout=None):
522 self.wait(timeout)
523 if not self._ready:
524 raise TimeoutError
525 if self._success:
526 return self._value
527 else:
528 raise self._value
529
530 def _set(self, i, obj):
531 self._success, self._value = obj
532 if self._callback and self._success:
533 self._callback(self._value)
534 self._cond.acquire()
535 try:
536 self._ready = True
537 self._cond.notify()
538 finally:
539 self._cond.release()
540 del self._cache[self._job]
541
542#
543# Class whose instances are returned by `Pool.map_async()`
544#
545
546class MapResult(ApplyResult):
547
548 def __init__(self, cache, chunksize, length, callback):
549 ApplyResult.__init__(self, cache, callback)
550 self._success = True
551 self._value = [None] * length
552 self._chunksize = chunksize
553 if chunksize <= 0:
554 self._number_left = 0
555 self._ready = True
556 else:
557 self._number_left = length//chunksize + bool(length % chunksize)
558
559 def _set(self, i, success_result):
560 success, result = success_result
561 if success:
562 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
563 self._number_left -= 1
564 if self._number_left == 0:
565 if self._callback:
566 self._callback(self._value)
567 del self._cache[self._job]
568 self._cond.acquire()
569 try:
570 self._ready = True
571 self._cond.notify()
572 finally:
573 self._cond.release()
574
575 else:
576 self._success = False
577 self._value = result
578 del self._cache[self._job]
579 self._cond.acquire()
580 try:
581 self._ready = True
582 self._cond.notify()
583 finally:
584 self._cond.release()
585
586#
587# Class whose instances are returned by `Pool.imap()`
588#
589
590class IMapIterator(object):
591
592 def __init__(self, cache):
593 self._cond = threading.Condition(threading.Lock())
594 self._job = job_counter.next()
595 self._cache = cache
596 self._items = collections.deque()
597 self._index = 0
598 self._length = None
599 self._unsorted = {}
600 cache[self._job] = self
601
602 def __iter__(self):
603 return self
604
605 def next(self, timeout=None):
606 self._cond.acquire()
607 try:
608 try:
609 item = self._items.popleft()
610 except IndexError:
611 if self._index == self._length:
612 raise StopIteration
613 self._cond.wait(timeout)
614 try:
615 item = self._items.popleft()
616 except IndexError:
617 if self._index == self._length:
618 raise StopIteration
619 raise TimeoutError
620 finally:
621 self._cond.release()
622
623 success, value = item
624 if success:
625 return value
626 raise value
627
628 __next__ = next # XXX
629
630 def _set(self, i, obj):
631 self._cond.acquire()
632 try:
633 if self._index == i:
634 self._items.append(obj)
635 self._index += 1
636 while self._index in self._unsorted:
637 obj = self._unsorted.pop(self._index)
638 self._items.append(obj)
639 self._index += 1
640 self._cond.notify()
641 else:
642 self._unsorted[i] = obj
643
644 if self._index == self._length:
645 del self._cache[self._job]
646 finally:
647 self._cond.release()
648
649 def _set_length(self, length):
650 self._cond.acquire()
651 try:
652 self._length = length
653 if self._index == self._length:
654 self._cond.notify()
655 del self._cache[self._job]
656 finally:
657 self._cond.release()
658
659#
660# Class whose instances are returned by `Pool.imap_unordered()`
661#
662
663class IMapUnorderedIterator(IMapIterator):
664
665 def _set(self, i, obj):
666 self._cond.acquire()
667 try:
668 self._items.append(obj)
669 self._index += 1
670 self._cond.notify()
671 if self._index == self._length:
672 del self._cache[self._job]
673 finally:
674 self._cond.release()
675
676#
677#
678#
679
680class ThreadPool(Pool):
681
682 from .dummy import Process
683
684 def __init__(self, processes=None, initializer=None, initargs=()):
685 Pool.__init__(self, processes, initializer, initargs)
686
687 def _setup_queues(self):
688 self._inqueue = Queue.Queue()
689 self._outqueue = Queue.Queue()
690 self._quick_put = self._inqueue.put
691 self._quick_get = self._outqueue.get
692
693 @staticmethod
694 def _help_stuff_finish(inqueue, task_handler, size):
695 # put sentinels at head of inqueue to make workers finish
696 inqueue.not_empty.acquire()
697 try:
698 inqueue.queue.clear()
699 inqueue.queue.extend([None] * size)
700 inqueue.not_empty.notify_all()
701 finally:
702 inqueue.not_empty.release()