blob: 1f366f035ee164bd599bceff16c35eb59b560f96 [file] [log] [blame]
Benjamin Peterson7f03ea72008-06-13 19:20:48 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
R. David Murray79af2452010-12-14 01:42:40 +00006# Copyright (c) 2006-2008, R Oudkerk
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12#
13# 1. Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15# 2. Redistributions in binary form must reproduce the above copyright
16# notice, this list of conditions and the following disclaimer in the
17# documentation and/or other materials provided with the distribution.
18# 3. Neither the name of author nor the names of any contributors may be
19# used to endorse or promote products derived from this software
20# without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
23# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32# SUCH DAMAGE.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000033#
34
35__all__ = ['Pool']
36
37#
38# Imports
39#
40
41import threading
42import Queue
43import itertools
44import collections
45import time
46
47from multiprocessing import Process, cpu_count, TimeoutError
48from multiprocessing.util import Finalize, debug
49
50#
51# Constants representing the state of a pool
52#
53
54RUN = 0
55CLOSE = 1
56TERMINATE = 2
57
58#
59# Miscellaneous
60#
61
62job_counter = itertools.count()
63
64def mapstar(args):
65 return map(*args)
66
67#
68# Code run by worker processes
69#
70
Jesse Noller654ade32010-01-27 03:05:57 +000071def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
72 assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000073 put = outqueue.put
74 get = inqueue.get
75 if hasattr(inqueue, '_writer'):
76 inqueue._writer.close()
77 outqueue._reader.close()
78
79 if initializer is not None:
80 initializer(*initargs)
81
Jesse Noller654ade32010-01-27 03:05:57 +000082 completed = 0
83 while maxtasks is None or (maxtasks and completed < maxtasks):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000084 try:
85 task = get()
86 except (EOFError, IOError):
87 debug('worker got EOFError or IOError -- exiting')
88 break
89
90 if task is None:
91 debug('worker got sentinel -- exiting')
92 break
93
94 job, i, func, args, kwds = task
95 try:
96 result = (True, func(*args, **kwds))
97 except Exception, e:
98 result = (False, e)
99 put((job, i, result))
Jesse Noller654ade32010-01-27 03:05:57 +0000100 completed += 1
101 debug('worker exiting after %d tasks' % completed)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000102
103#
104# Class representing a process pool
105#
106
107class Pool(object):
108 '''
109 Class which supports an async version of the `apply()` builtin
110 '''
111 Process = Process
112
Jesse Noller654ade32010-01-27 03:05:57 +0000113 def __init__(self, processes=None, initializer=None, initargs=(),
114 maxtasksperchild=None):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000115 self._setup_queues()
116 self._taskqueue = Queue.Queue()
117 self._cache = {}
118 self._state = RUN
Jesse Noller654ade32010-01-27 03:05:57 +0000119 self._maxtasksperchild = maxtasksperchild
120 self._initializer = initializer
121 self._initargs = initargs
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000122
123 if processes is None:
124 try:
125 processes = cpu_count()
126 except NotImplementedError:
127 processes = 1
128
Jesse Noller7152f6d2009-04-02 05:17:26 +0000129 if initializer is not None and not hasattr(initializer, '__call__'):
130 raise TypeError('initializer must be a callable')
131
Jesse Noller654ade32010-01-27 03:05:57 +0000132 self._processes = processes
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000133 self._pool = []
Jesse Noller654ade32010-01-27 03:05:57 +0000134 self._repopulate_pool()
135
136 self._worker_handler = threading.Thread(
137 target=Pool._handle_workers,
138 args=(self, )
139 )
140 self._worker_handler.daemon = True
141 self._worker_handler._state = RUN
142 self._worker_handler.start()
143
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000144
145 self._task_handler = threading.Thread(
146 target=Pool._handle_tasks,
147 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
148 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000149 self._task_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000150 self._task_handler._state = RUN
151 self._task_handler.start()
152
153 self._result_handler = threading.Thread(
154 target=Pool._handle_results,
155 args=(self._outqueue, self._quick_get, self._cache)
156 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000157 self._result_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000158 self._result_handler._state = RUN
159 self._result_handler.start()
160
161 self._terminate = Finalize(
162 self, self._terminate_pool,
163 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000164 self._worker_handler, self._task_handler,
165 self._result_handler, self._cache),
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000166 exitpriority=15
167 )
168
Jesse Noller654ade32010-01-27 03:05:57 +0000169 def _join_exited_workers(self):
170 """Cleanup after any worker processes which have exited due to reaching
171 their specified lifetime. Returns True if any workers were cleaned up.
172 """
173 cleaned = False
174 for i in reversed(range(len(self._pool))):
175 worker = self._pool[i]
176 if worker.exitcode is not None:
177 # worker exited
178 debug('cleaning up worker %d' % i)
179 worker.join()
180 cleaned = True
181 del self._pool[i]
182 return cleaned
183
184 def _repopulate_pool(self):
185 """Bring the number of pool processes up to the specified number,
186 for use after reaping workers which have exited.
187 """
188 for i in range(self._processes - len(self._pool)):
189 w = self.Process(target=worker,
190 args=(self._inqueue, self._outqueue,
191 self._initializer,
192 self._initargs, self._maxtasksperchild)
193 )
194 self._pool.append(w)
195 w.name = w.name.replace('Process', 'PoolWorker')
196 w.daemon = True
197 w.start()
198 debug('added worker')
199
200 def _maintain_pool(self):
201 """Clean up any exited workers and start replacements for them.
202 """
203 if self._join_exited_workers():
204 self._repopulate_pool()
205
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000206 def _setup_queues(self):
207 from .queues import SimpleQueue
208 self._inqueue = SimpleQueue()
209 self._outqueue = SimpleQueue()
210 self._quick_put = self._inqueue._writer.send
211 self._quick_get = self._outqueue._reader.recv
212
213 def apply(self, func, args=(), kwds={}):
214 '''
215 Equivalent of `apply()` builtin
216 '''
217 assert self._state == RUN
218 return self.apply_async(func, args, kwds).get()
219
220 def map(self, func, iterable, chunksize=None):
221 '''
222 Equivalent of `map()` builtin
223 '''
224 assert self._state == RUN
225 return self.map_async(func, iterable, chunksize).get()
226
227 def imap(self, func, iterable, chunksize=1):
228 '''
Georg Brandl5ecd7452008-11-22 08:45:33 +0000229 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000230 '''
231 assert self._state == RUN
232 if chunksize == 1:
233 result = IMapIterator(self._cache)
234 self._taskqueue.put((((result._job, i, func, (x,), {})
235 for i, x in enumerate(iterable)), result._set_length))
236 return result
237 else:
238 assert chunksize > 1
239 task_batches = Pool._get_tasks(func, iterable, chunksize)
240 result = IMapIterator(self._cache)
241 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
242 for i, x in enumerate(task_batches)), result._set_length))
243 return (item for chunk in result for item in chunk)
244
245 def imap_unordered(self, func, iterable, chunksize=1):
246 '''
247 Like `imap()` method but ordering of results is arbitrary
248 '''
249 assert self._state == RUN
250 if chunksize == 1:
251 result = IMapUnorderedIterator(self._cache)
252 self._taskqueue.put((((result._job, i, func, (x,), {})
253 for i, x in enumerate(iterable)), result._set_length))
254 return result
255 else:
256 assert chunksize > 1
257 task_batches = Pool._get_tasks(func, iterable, chunksize)
258 result = IMapUnorderedIterator(self._cache)
259 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
260 for i, x in enumerate(task_batches)), result._set_length))
261 return (item for chunk in result for item in chunk)
262
263 def apply_async(self, func, args=(), kwds={}, callback=None):
264 '''
265 Asynchronous equivalent of `apply()` builtin
266 '''
267 assert self._state == RUN
268 result = ApplyResult(self._cache, callback)
269 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
270 return result
271
272 def map_async(self, func, iterable, chunksize=None, callback=None):
273 '''
274 Asynchronous equivalent of `map()` builtin
275 '''
276 assert self._state == RUN
277 if not hasattr(iterable, '__len__'):
278 iterable = list(iterable)
279
280 if chunksize is None:
281 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
282 if extra:
283 chunksize += 1
Jesse Noller7530e472009-07-16 14:23:04 +0000284 if len(iterable) == 0:
285 chunksize = 0
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000286
287 task_batches = Pool._get_tasks(func, iterable, chunksize)
288 result = MapResult(self._cache, chunksize, len(iterable), callback)
289 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
290 for i, x in enumerate(task_batches)), None))
291 return result
292
293 @staticmethod
Jesse Noller654ade32010-01-27 03:05:57 +0000294 def _handle_workers(pool):
295 while pool._worker_handler._state == RUN and pool._state == RUN:
296 pool._maintain_pool()
297 time.sleep(0.1)
298 debug('worker handler exiting')
299
300 @staticmethod
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000301 def _handle_tasks(taskqueue, put, outqueue, pool):
302 thread = threading.current_thread()
303
304 for taskseq, set_length in iter(taskqueue.get, None):
305 i = -1
306 for i, task in enumerate(taskseq):
307 if thread._state:
308 debug('task handler found thread._state != RUN')
309 break
310 try:
311 put(task)
312 except IOError:
313 debug('could not put task on queue')
314 break
315 else:
316 if set_length:
317 debug('doing set_length()')
318 set_length(i+1)
319 continue
320 break
321 else:
322 debug('task handler got sentinel')
323
324
325 try:
326 # tell result handler to finish when cache is empty
327 debug('task handler sending sentinel to result handler')
328 outqueue.put(None)
329
330 # tell workers there is no more work
331 debug('task handler sending sentinel to workers')
332 for p in pool:
333 put(None)
334 except IOError:
335 debug('task handler got IOError when sending sentinels')
336
337 debug('task handler exiting')
338
339 @staticmethod
340 def _handle_results(outqueue, get, cache):
341 thread = threading.current_thread()
342
343 while 1:
344 try:
345 task = get()
346 except (IOError, EOFError):
347 debug('result handler got EOFError/IOError -- exiting')
348 return
349
350 if thread._state:
351 assert thread._state == TERMINATE
352 debug('result handler found thread._state=TERMINATE')
353 break
354
355 if task is None:
356 debug('result handler got sentinel')
357 break
358
359 job, i, obj = task
360 try:
361 cache[job]._set(i, obj)
362 except KeyError:
363 pass
364
365 while cache and thread._state != TERMINATE:
366 try:
367 task = get()
368 except (IOError, EOFError):
369 debug('result handler got EOFError/IOError -- exiting')
370 return
371
372 if task is None:
373 debug('result handler ignoring extra sentinel')
374 continue
375 job, i, obj = task
376 try:
377 cache[job]._set(i, obj)
378 except KeyError:
379 pass
380
381 if hasattr(outqueue, '_reader'):
382 debug('ensuring that outqueue is not full')
383 # If we don't make room available in outqueue then
384 # attempts to add the sentinel (None) to outqueue may
385 # block. There is guaranteed to be no more than 2 sentinels.
386 try:
387 for i in range(10):
388 if not outqueue._reader.poll():
389 break
390 get()
391 except (IOError, EOFError):
392 pass
393
394 debug('result handler exiting: len(cache)=%s, thread._state=%s',
395 len(cache), thread._state)
396
397 @staticmethod
398 def _get_tasks(func, it, size):
399 it = iter(it)
400 while 1:
401 x = tuple(itertools.islice(it, size))
402 if not x:
403 return
404 yield (func, x)
405
406 def __reduce__(self):
407 raise NotImplementedError(
408 'pool objects cannot be passed between processes or pickled'
409 )
410
411 def close(self):
412 debug('closing pool')
413 if self._state == RUN:
414 self._state = CLOSE
Jesse Noller654ade32010-01-27 03:05:57 +0000415 self._worker_handler._state = CLOSE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000416 self._taskqueue.put(None)
417
418 def terminate(self):
419 debug('terminating pool')
420 self._state = TERMINATE
Jesse Noller654ade32010-01-27 03:05:57 +0000421 self._worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000422 self._terminate()
423
424 def join(self):
425 debug('joining pool')
426 assert self._state in (CLOSE, TERMINATE)
Jesse Noller654ade32010-01-27 03:05:57 +0000427 self._worker_handler.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000428 self._task_handler.join()
429 self._result_handler.join()
430 for p in self._pool:
431 p.join()
432
433 @staticmethod
434 def _help_stuff_finish(inqueue, task_handler, size):
435 # task_handler may be blocked trying to put items on inqueue
436 debug('removing tasks from inqueue until task handler finished')
437 inqueue._rlock.acquire()
438 while task_handler.is_alive() and inqueue._reader.poll():
439 inqueue._reader.recv()
440 time.sleep(0)
441
442 @classmethod
443 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000444 worker_handler, task_handler, result_handler, cache):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000445 # this is guaranteed to only be called once
446 debug('finalizing pool')
447
Jesse Noller654ade32010-01-27 03:05:57 +0000448 worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000449 task_handler._state = TERMINATE
450 taskqueue.put(None) # sentinel
451
452 debug('helping task handler/workers to finish')
453 cls._help_stuff_finish(inqueue, task_handler, len(pool))
454
455 assert result_handler.is_alive() or len(cache) == 0
456
457 result_handler._state = TERMINATE
458 outqueue.put(None) # sentinel
459
Jesse Noller654ade32010-01-27 03:05:57 +0000460 # Terminate workers which haven't already finished.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000461 if pool and hasattr(pool[0], 'terminate'):
462 debug('terminating workers')
463 for p in pool:
Jesse Noller654ade32010-01-27 03:05:57 +0000464 if p.exitcode is None:
465 p.terminate()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000466
467 debug('joining task handler')
468 task_handler.join(1e100)
469
470 debug('joining result handler')
471 result_handler.join(1e100)
472
473 if pool and hasattr(pool[0], 'terminate'):
474 debug('joining pool workers')
475 for p in pool:
Florent Xiclunad034b322010-03-08 11:01:39 +0000476 if p.is_alive():
Jesse Noller654ade32010-01-27 03:05:57 +0000477 # worker has not yet exited
Florent Xiclunad034b322010-03-08 11:01:39 +0000478 debug('cleaning up worker %d' % p.pid)
479 p.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000480
481#
482# Class whose instances are returned by `Pool.apply_async()`
483#
484
485class ApplyResult(object):
486
487 def __init__(self, cache, callback):
488 self._cond = threading.Condition(threading.Lock())
489 self._job = job_counter.next()
490 self._cache = cache
491 self._ready = False
492 self._callback = callback
493 cache[self._job] = self
494
495 def ready(self):
496 return self._ready
497
498 def successful(self):
499 assert self._ready
500 return self._success
501
502 def wait(self, timeout=None):
503 self._cond.acquire()
504 try:
505 if not self._ready:
506 self._cond.wait(timeout)
507 finally:
508 self._cond.release()
509
510 def get(self, timeout=None):
511 self.wait(timeout)
512 if not self._ready:
513 raise TimeoutError
514 if self._success:
515 return self._value
516 else:
517 raise self._value
518
519 def _set(self, i, obj):
520 self._success, self._value = obj
521 if self._callback and self._success:
522 self._callback(self._value)
523 self._cond.acquire()
524 try:
525 self._ready = True
526 self._cond.notify()
527 finally:
528 self._cond.release()
529 del self._cache[self._job]
530
531#
532# Class whose instances are returned by `Pool.map_async()`
533#
534
535class MapResult(ApplyResult):
536
537 def __init__(self, cache, chunksize, length, callback):
538 ApplyResult.__init__(self, cache, callback)
539 self._success = True
540 self._value = [None] * length
541 self._chunksize = chunksize
542 if chunksize <= 0:
543 self._number_left = 0
544 self._ready = True
545 else:
546 self._number_left = length//chunksize + bool(length % chunksize)
547
548 def _set(self, i, success_result):
549 success, result = success_result
550 if success:
551 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
552 self._number_left -= 1
553 if self._number_left == 0:
554 if self._callback:
555 self._callback(self._value)
556 del self._cache[self._job]
557 self._cond.acquire()
558 try:
559 self._ready = True
560 self._cond.notify()
561 finally:
562 self._cond.release()
563
564 else:
565 self._success = False
566 self._value = result
567 del self._cache[self._job]
568 self._cond.acquire()
569 try:
570 self._ready = True
571 self._cond.notify()
572 finally:
573 self._cond.release()
574
575#
576# Class whose instances are returned by `Pool.imap()`
577#
578
579class IMapIterator(object):
580
581 def __init__(self, cache):
582 self._cond = threading.Condition(threading.Lock())
583 self._job = job_counter.next()
584 self._cache = cache
585 self._items = collections.deque()
586 self._index = 0
587 self._length = None
588 self._unsorted = {}
589 cache[self._job] = self
590
591 def __iter__(self):
592 return self
593
594 def next(self, timeout=None):
595 self._cond.acquire()
596 try:
597 try:
598 item = self._items.popleft()
599 except IndexError:
600 if self._index == self._length:
601 raise StopIteration
602 self._cond.wait(timeout)
603 try:
604 item = self._items.popleft()
605 except IndexError:
606 if self._index == self._length:
607 raise StopIteration
608 raise TimeoutError
609 finally:
610 self._cond.release()
611
612 success, value = item
613 if success:
614 return value
615 raise value
616
617 __next__ = next # XXX
618
619 def _set(self, i, obj):
620 self._cond.acquire()
621 try:
622 if self._index == i:
623 self._items.append(obj)
624 self._index += 1
625 while self._index in self._unsorted:
626 obj = self._unsorted.pop(self._index)
627 self._items.append(obj)
628 self._index += 1
629 self._cond.notify()
630 else:
631 self._unsorted[i] = obj
632
633 if self._index == self._length:
634 del self._cache[self._job]
635 finally:
636 self._cond.release()
637
638 def _set_length(self, length):
639 self._cond.acquire()
640 try:
641 self._length = length
642 if self._index == self._length:
643 self._cond.notify()
644 del self._cache[self._job]
645 finally:
646 self._cond.release()
647
648#
649# Class whose instances are returned by `Pool.imap_unordered()`
650#
651
652class IMapUnorderedIterator(IMapIterator):
653
654 def _set(self, i, obj):
655 self._cond.acquire()
656 try:
657 self._items.append(obj)
658 self._index += 1
659 self._cond.notify()
660 if self._index == self._length:
661 del self._cache[self._job]
662 finally:
663 self._cond.release()
664
665#
666#
667#
668
669class ThreadPool(Pool):
670
671 from .dummy import Process
672
673 def __init__(self, processes=None, initializer=None, initargs=()):
674 Pool.__init__(self, processes, initializer, initargs)
675
676 def _setup_queues(self):
677 self._inqueue = Queue.Queue()
678 self._outqueue = Queue.Queue()
679 self._quick_put = self._inqueue.put
680 self._quick_get = self._outqueue.get
681
682 @staticmethod
683 def _help_stuff_finish(inqueue, task_handler, size):
684 # put sentinels at head of inqueue to make workers finish
685 inqueue.not_empty.acquire()
686 try:
687 inqueue.queue.clear()
688 inqueue.queue.extend([None] * size)
689 inqueue.not_empty.notify_all()
690 finally:
691 inqueue.not_empty.release()