blob: c66876a856cf15394ea8558dfaa0023f61d0ccd1 [file] [log] [blame]
Benjamin Peterson7f03ea72008-06-13 19:20:48 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
R. David Murray79af2452010-12-14 01:42:40 +00006# Copyright (c) 2006-2008, R Oudkerk
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12#
13# 1. Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15# 2. Redistributions in binary form must reproduce the above copyright
16# notice, this list of conditions and the following disclaimer in the
17# documentation and/or other materials provided with the distribution.
18# 3. Neither the name of author nor the names of any contributors may be
19# used to endorse or promote products derived from this software
20# without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
23# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32# SUCH DAMAGE.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000033#
34
35__all__ = ['Pool']
36
37#
38# Imports
39#
40
41import threading
42import Queue
43import itertools
44import collections
45import time
46
47from multiprocessing import Process, cpu_count, TimeoutError
48from multiprocessing.util import Finalize, debug
49
50#
51# Constants representing the state of a pool
52#
53
54RUN = 0
55CLOSE = 1
56TERMINATE = 2
57
58#
59# Miscellaneous
60#
61
62job_counter = itertools.count()
63
64def mapstar(args):
65 return map(*args)
66
67#
68# Code run by worker processes
69#
70
Jesse Noller654ade32010-01-27 03:05:57 +000071def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None):
72 assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000073 put = outqueue.put
74 get = inqueue.get
75 if hasattr(inqueue, '_writer'):
76 inqueue._writer.close()
77 outqueue._reader.close()
78
79 if initializer is not None:
80 initializer(*initargs)
81
Jesse Noller654ade32010-01-27 03:05:57 +000082 completed = 0
83 while maxtasks is None or (maxtasks and completed < maxtasks):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000084 try:
85 task = get()
86 except (EOFError, IOError):
87 debug('worker got EOFError or IOError -- exiting')
88 break
89
90 if task is None:
91 debug('worker got sentinel -- exiting')
92 break
93
94 job, i, func, args, kwds = task
95 try:
96 result = (True, func(*args, **kwds))
97 except Exception, e:
98 result = (False, e)
99 put((job, i, result))
Jesse Noller654ade32010-01-27 03:05:57 +0000100 completed += 1
101 debug('worker exiting after %d tasks' % completed)
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000102
103#
104# Class representing a process pool
105#
106
107class Pool(object):
108 '''
109 Class which supports an async version of the `apply()` builtin
110 '''
111 Process = Process
112
Jesse Noller654ade32010-01-27 03:05:57 +0000113 def __init__(self, processes=None, initializer=None, initargs=(),
114 maxtasksperchild=None):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000115 self._setup_queues()
116 self._taskqueue = Queue.Queue()
117 self._cache = {}
118 self._state = RUN
Jesse Noller654ade32010-01-27 03:05:57 +0000119 self._maxtasksperchild = maxtasksperchild
120 self._initializer = initializer
121 self._initargs = initargs
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000122
123 if processes is None:
124 try:
125 processes = cpu_count()
126 except NotImplementedError:
127 processes = 1
Victor Stinnerf64a0cf2011-06-20 17:54:33 +0200128 if processes < 1:
129 raise ValueError("Number of processes must be at least 1")
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000130
Jesse Noller7152f6d2009-04-02 05:17:26 +0000131 if initializer is not None and not hasattr(initializer, '__call__'):
132 raise TypeError('initializer must be a callable')
133
Jesse Noller654ade32010-01-27 03:05:57 +0000134 self._processes = processes
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000135 self._pool = []
Jesse Noller654ade32010-01-27 03:05:57 +0000136 self._repopulate_pool()
137
138 self._worker_handler = threading.Thread(
139 target=Pool._handle_workers,
140 args=(self, )
141 )
142 self._worker_handler.daemon = True
143 self._worker_handler._state = RUN
144 self._worker_handler.start()
145
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000146
147 self._task_handler = threading.Thread(
148 target=Pool._handle_tasks,
149 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
150 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000151 self._task_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000152 self._task_handler._state = RUN
153 self._task_handler.start()
154
155 self._result_handler = threading.Thread(
156 target=Pool._handle_results,
157 args=(self._outqueue, self._quick_get, self._cache)
158 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000159 self._result_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000160 self._result_handler._state = RUN
161 self._result_handler.start()
162
163 self._terminate = Finalize(
164 self, self._terminate_pool,
165 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000166 self._worker_handler, self._task_handler,
167 self._result_handler, self._cache),
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000168 exitpriority=15
169 )
170
Jesse Noller654ade32010-01-27 03:05:57 +0000171 def _join_exited_workers(self):
172 """Cleanup after any worker processes which have exited due to reaching
173 their specified lifetime. Returns True if any workers were cleaned up.
174 """
175 cleaned = False
176 for i in reversed(range(len(self._pool))):
177 worker = self._pool[i]
178 if worker.exitcode is not None:
179 # worker exited
180 debug('cleaning up worker %d' % i)
181 worker.join()
182 cleaned = True
183 del self._pool[i]
184 return cleaned
185
186 def _repopulate_pool(self):
187 """Bring the number of pool processes up to the specified number,
188 for use after reaping workers which have exited.
189 """
190 for i in range(self._processes - len(self._pool)):
191 w = self.Process(target=worker,
192 args=(self._inqueue, self._outqueue,
193 self._initializer,
194 self._initargs, self._maxtasksperchild)
195 )
196 self._pool.append(w)
197 w.name = w.name.replace('Process', 'PoolWorker')
198 w.daemon = True
199 w.start()
200 debug('added worker')
201
202 def _maintain_pool(self):
203 """Clean up any exited workers and start replacements for them.
204 """
205 if self._join_exited_workers():
206 self._repopulate_pool()
207
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000208 def _setup_queues(self):
209 from .queues import SimpleQueue
210 self._inqueue = SimpleQueue()
211 self._outqueue = SimpleQueue()
212 self._quick_put = self._inqueue._writer.send
213 self._quick_get = self._outqueue._reader.recv
214
215 def apply(self, func, args=(), kwds={}):
216 '''
217 Equivalent of `apply()` builtin
218 '''
219 assert self._state == RUN
220 return self.apply_async(func, args, kwds).get()
221
222 def map(self, func, iterable, chunksize=None):
223 '''
224 Equivalent of `map()` builtin
225 '''
226 assert self._state == RUN
227 return self.map_async(func, iterable, chunksize).get()
228
229 def imap(self, func, iterable, chunksize=1):
230 '''
Georg Brandl5ecd7452008-11-22 08:45:33 +0000231 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000232 '''
233 assert self._state == RUN
234 if chunksize == 1:
235 result = IMapIterator(self._cache)
236 self._taskqueue.put((((result._job, i, func, (x,), {})
237 for i, x in enumerate(iterable)), result._set_length))
238 return result
239 else:
240 assert chunksize > 1
241 task_batches = Pool._get_tasks(func, iterable, chunksize)
242 result = IMapIterator(self._cache)
243 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
244 for i, x in enumerate(task_batches)), result._set_length))
245 return (item for chunk in result for item in chunk)
246
247 def imap_unordered(self, func, iterable, chunksize=1):
248 '''
249 Like `imap()` method but ordering of results is arbitrary
250 '''
251 assert self._state == RUN
252 if chunksize == 1:
253 result = IMapUnorderedIterator(self._cache)
254 self._taskqueue.put((((result._job, i, func, (x,), {})
255 for i, x in enumerate(iterable)), result._set_length))
256 return result
257 else:
258 assert chunksize > 1
259 task_batches = Pool._get_tasks(func, iterable, chunksize)
260 result = IMapUnorderedIterator(self._cache)
261 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
262 for i, x in enumerate(task_batches)), result._set_length))
263 return (item for chunk in result for item in chunk)
264
265 def apply_async(self, func, args=(), kwds={}, callback=None):
266 '''
267 Asynchronous equivalent of `apply()` builtin
268 '''
269 assert self._state == RUN
270 result = ApplyResult(self._cache, callback)
271 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
272 return result
273
274 def map_async(self, func, iterable, chunksize=None, callback=None):
275 '''
276 Asynchronous equivalent of `map()` builtin
277 '''
278 assert self._state == RUN
279 if not hasattr(iterable, '__len__'):
280 iterable = list(iterable)
281
282 if chunksize is None:
283 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
284 if extra:
285 chunksize += 1
Jesse Noller7530e472009-07-16 14:23:04 +0000286 if len(iterable) == 0:
287 chunksize = 0
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000288
289 task_batches = Pool._get_tasks(func, iterable, chunksize)
290 result = MapResult(self._cache, chunksize, len(iterable), callback)
291 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
292 for i, x in enumerate(task_batches)), None))
293 return result
294
295 @staticmethod
Jesse Noller654ade32010-01-27 03:05:57 +0000296 def _handle_workers(pool):
297 while pool._worker_handler._state == RUN and pool._state == RUN:
298 pool._maintain_pool()
299 time.sleep(0.1)
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200300 # send sentinel to stop workers
301 pool._taskqueue.put(None)
Jesse Noller654ade32010-01-27 03:05:57 +0000302 debug('worker handler exiting')
303
304 @staticmethod
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000305 def _handle_tasks(taskqueue, put, outqueue, pool):
306 thread = threading.current_thread()
307
308 for taskseq, set_length in iter(taskqueue.get, None):
309 i = -1
310 for i, task in enumerate(taskseq):
311 if thread._state:
312 debug('task handler found thread._state != RUN')
313 break
314 try:
315 put(task)
316 except IOError:
317 debug('could not put task on queue')
318 break
319 else:
320 if set_length:
321 debug('doing set_length()')
322 set_length(i+1)
323 continue
324 break
325 else:
326 debug('task handler got sentinel')
327
328
329 try:
330 # tell result handler to finish when cache is empty
331 debug('task handler sending sentinel to result handler')
332 outqueue.put(None)
333
334 # tell workers there is no more work
335 debug('task handler sending sentinel to workers')
336 for p in pool:
337 put(None)
338 except IOError:
339 debug('task handler got IOError when sending sentinels')
340
341 debug('task handler exiting')
342
343 @staticmethod
344 def _handle_results(outqueue, get, cache):
345 thread = threading.current_thread()
346
347 while 1:
348 try:
349 task = get()
350 except (IOError, EOFError):
351 debug('result handler got EOFError/IOError -- exiting')
352 return
353
354 if thread._state:
355 assert thread._state == TERMINATE
356 debug('result handler found thread._state=TERMINATE')
357 break
358
359 if task is None:
360 debug('result handler got sentinel')
361 break
362
363 job, i, obj = task
364 try:
365 cache[job]._set(i, obj)
366 except KeyError:
367 pass
368
369 while cache and thread._state != TERMINATE:
370 try:
371 task = get()
372 except (IOError, EOFError):
373 debug('result handler got EOFError/IOError -- exiting')
374 return
375
376 if task is None:
377 debug('result handler ignoring extra sentinel')
378 continue
379 job, i, obj = task
380 try:
381 cache[job]._set(i, obj)
382 except KeyError:
383 pass
384
385 if hasattr(outqueue, '_reader'):
386 debug('ensuring that outqueue is not full')
387 # If we don't make room available in outqueue then
388 # attempts to add the sentinel (None) to outqueue may
389 # block. There is guaranteed to be no more than 2 sentinels.
390 try:
391 for i in range(10):
392 if not outqueue._reader.poll():
393 break
394 get()
395 except (IOError, EOFError):
396 pass
397
398 debug('result handler exiting: len(cache)=%s, thread._state=%s',
399 len(cache), thread._state)
400
401 @staticmethod
402 def _get_tasks(func, it, size):
403 it = iter(it)
404 while 1:
405 x = tuple(itertools.islice(it, size))
406 if not x:
407 return
408 yield (func, x)
409
410 def __reduce__(self):
411 raise NotImplementedError(
412 'pool objects cannot be passed between processes or pickled'
413 )
414
415 def close(self):
416 debug('closing pool')
417 if self._state == RUN:
418 self._state = CLOSE
Jesse Noller654ade32010-01-27 03:05:57 +0000419 self._worker_handler._state = CLOSE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000420
421 def terminate(self):
422 debug('terminating pool')
423 self._state = TERMINATE
Jesse Noller654ade32010-01-27 03:05:57 +0000424 self._worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000425 self._terminate()
426
427 def join(self):
428 debug('joining pool')
429 assert self._state in (CLOSE, TERMINATE)
Jesse Noller654ade32010-01-27 03:05:57 +0000430 self._worker_handler.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000431 self._task_handler.join()
432 self._result_handler.join()
433 for p in self._pool:
434 p.join()
435
436 @staticmethod
437 def _help_stuff_finish(inqueue, task_handler, size):
438 # task_handler may be blocked trying to put items on inqueue
439 debug('removing tasks from inqueue until task handler finished')
440 inqueue._rlock.acquire()
441 while task_handler.is_alive() and inqueue._reader.poll():
442 inqueue._reader.recv()
443 time.sleep(0)
444
445 @classmethod
446 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
Jesse Noller654ade32010-01-27 03:05:57 +0000447 worker_handler, task_handler, result_handler, cache):
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000448 # this is guaranteed to only be called once
449 debug('finalizing pool')
450
Jesse Noller654ade32010-01-27 03:05:57 +0000451 worker_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000452 task_handler._state = TERMINATE
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000453
454 debug('helping task handler/workers to finish')
455 cls._help_stuff_finish(inqueue, task_handler, len(pool))
456
457 assert result_handler.is_alive() or len(cache) == 0
458
459 result_handler._state = TERMINATE
460 outqueue.put(None) # sentinel
461
Antoine Pitrou7dfc8742011-04-11 00:26:42 +0200462 # We must wait for the worker handler to exit before terminating
463 # workers because we don't want workers to be restarted behind our back.
464 debug('joining worker handler')
465 worker_handler.join()
466
Jesse Noller654ade32010-01-27 03:05:57 +0000467 # Terminate workers which haven't already finished.
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000468 if pool and hasattr(pool[0], 'terminate'):
469 debug('terminating workers')
470 for p in pool:
Jesse Noller654ade32010-01-27 03:05:57 +0000471 if p.exitcode is None:
472 p.terminate()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000473
474 debug('joining task handler')
475 task_handler.join(1e100)
476
477 debug('joining result handler')
478 result_handler.join(1e100)
479
480 if pool and hasattr(pool[0], 'terminate'):
481 debug('joining pool workers')
482 for p in pool:
Florent Xiclunad034b322010-03-08 11:01:39 +0000483 if p.is_alive():
Jesse Noller654ade32010-01-27 03:05:57 +0000484 # worker has not yet exited
Florent Xiclunad034b322010-03-08 11:01:39 +0000485 debug('cleaning up worker %d' % p.pid)
486 p.join()
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000487
488#
489# Class whose instances are returned by `Pool.apply_async()`
490#
491
492class ApplyResult(object):
493
494 def __init__(self, cache, callback):
495 self._cond = threading.Condition(threading.Lock())
496 self._job = job_counter.next()
497 self._cache = cache
498 self._ready = False
499 self._callback = callback
500 cache[self._job] = self
501
502 def ready(self):
503 return self._ready
504
505 def successful(self):
506 assert self._ready
507 return self._success
508
509 def wait(self, timeout=None):
510 self._cond.acquire()
511 try:
512 if not self._ready:
513 self._cond.wait(timeout)
514 finally:
515 self._cond.release()
516
517 def get(self, timeout=None):
518 self.wait(timeout)
519 if not self._ready:
520 raise TimeoutError
521 if self._success:
522 return self._value
523 else:
524 raise self._value
525
526 def _set(self, i, obj):
527 self._success, self._value = obj
528 if self._callback and self._success:
529 self._callback(self._value)
530 self._cond.acquire()
531 try:
532 self._ready = True
533 self._cond.notify()
534 finally:
535 self._cond.release()
536 del self._cache[self._job]
537
538#
539# Class whose instances are returned by `Pool.map_async()`
540#
541
542class MapResult(ApplyResult):
543
544 def __init__(self, cache, chunksize, length, callback):
545 ApplyResult.__init__(self, cache, callback)
546 self._success = True
547 self._value = [None] * length
548 self._chunksize = chunksize
549 if chunksize <= 0:
550 self._number_left = 0
551 self._ready = True
552 else:
553 self._number_left = length//chunksize + bool(length % chunksize)
554
555 def _set(self, i, success_result):
556 success, result = success_result
557 if success:
558 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
559 self._number_left -= 1
560 if self._number_left == 0:
561 if self._callback:
562 self._callback(self._value)
563 del self._cache[self._job]
564 self._cond.acquire()
565 try:
566 self._ready = True
567 self._cond.notify()
568 finally:
569 self._cond.release()
570
571 else:
572 self._success = False
573 self._value = result
574 del self._cache[self._job]
575 self._cond.acquire()
576 try:
577 self._ready = True
578 self._cond.notify()
579 finally:
580 self._cond.release()
581
582#
583# Class whose instances are returned by `Pool.imap()`
584#
585
586class IMapIterator(object):
587
588 def __init__(self, cache):
589 self._cond = threading.Condition(threading.Lock())
590 self._job = job_counter.next()
591 self._cache = cache
592 self._items = collections.deque()
593 self._index = 0
594 self._length = None
595 self._unsorted = {}
596 cache[self._job] = self
597
598 def __iter__(self):
599 return self
600
601 def next(self, timeout=None):
602 self._cond.acquire()
603 try:
604 try:
605 item = self._items.popleft()
606 except IndexError:
607 if self._index == self._length:
608 raise StopIteration
609 self._cond.wait(timeout)
610 try:
611 item = self._items.popleft()
612 except IndexError:
613 if self._index == self._length:
614 raise StopIteration
615 raise TimeoutError
616 finally:
617 self._cond.release()
618
619 success, value = item
620 if success:
621 return value
622 raise value
623
624 __next__ = next # XXX
625
626 def _set(self, i, obj):
627 self._cond.acquire()
628 try:
629 if self._index == i:
630 self._items.append(obj)
631 self._index += 1
632 while self._index in self._unsorted:
633 obj = self._unsorted.pop(self._index)
634 self._items.append(obj)
635 self._index += 1
636 self._cond.notify()
637 else:
638 self._unsorted[i] = obj
639
640 if self._index == self._length:
641 del self._cache[self._job]
642 finally:
643 self._cond.release()
644
645 def _set_length(self, length):
646 self._cond.acquire()
647 try:
648 self._length = length
649 if self._index == self._length:
650 self._cond.notify()
651 del self._cache[self._job]
652 finally:
653 self._cond.release()
654
655#
656# Class whose instances are returned by `Pool.imap_unordered()`
657#
658
659class IMapUnorderedIterator(IMapIterator):
660
661 def _set(self, i, obj):
662 self._cond.acquire()
663 try:
664 self._items.append(obj)
665 self._index += 1
666 self._cond.notify()
667 if self._index == self._length:
668 del self._cache[self._job]
669 finally:
670 self._cond.release()
671
672#
673#
674#
675
676class ThreadPool(Pool):
677
678 from .dummy import Process
679
680 def __init__(self, processes=None, initializer=None, initargs=()):
681 Pool.__init__(self, processes, initializer, initargs)
682
683 def _setup_queues(self):
684 self._inqueue = Queue.Queue()
685 self._outqueue = Queue.Queue()
686 self._quick_put = self._inqueue.put
687 self._quick_get = self._outqueue.get
688
689 @staticmethod
690 def _help_stuff_finish(inqueue, task_handler, size):
691 # put sentinels at head of inqueue to make workers finish
692 inqueue.not_empty.acquire()
693 try:
694 inqueue.queue.clear()
695 inqueue.queue.extend([None] * size)
696 inqueue.not_empty.notify_all()
697 finally:
698 inqueue.not_empty.release()