blob: bc7e8f1712e86d420f1412cb39b5d1c41a66a478 [file] [log] [blame]
Benjamin Peterson7f03ea72008-06-13 19:20:48 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
6# Copyright (c) 2007-2008, R Oudkerk --- see COPYING.txt
7#
8
9__all__ = ['Pool']
10
11#
12# Imports
13#
14
15import threading
16import Queue
17import itertools
18import collections
19import time
20
21from multiprocessing import Process, cpu_count, TimeoutError
22from multiprocessing.util import Finalize, debug
23
24#
25# Constants representing the state of a pool
26#
27
28RUN = 0
29CLOSE = 1
30TERMINATE = 2
31
32#
33# Miscellaneous
34#
35
36job_counter = itertools.count()
37
38def mapstar(args):
39 return map(*args)
40
41#
42# Code run by worker processes
43#
44
45def worker(inqueue, outqueue, initializer=None, initargs=()):
46 put = outqueue.put
47 get = inqueue.get
48 if hasattr(inqueue, '_writer'):
49 inqueue._writer.close()
50 outqueue._reader.close()
51
52 if initializer is not None:
53 initializer(*initargs)
54
55 while 1:
56 try:
57 task = get()
58 except (EOFError, IOError):
59 debug('worker got EOFError or IOError -- exiting')
60 break
61
62 if task is None:
63 debug('worker got sentinel -- exiting')
64 break
65
66 job, i, func, args, kwds = task
67 try:
68 result = (True, func(*args, **kwds))
69 except Exception, e:
70 result = (False, e)
71 put((job, i, result))
72
73#
74# Class representing a process pool
75#
76
77class Pool(object):
78 '''
79 Class which supports an async version of the `apply()` builtin
80 '''
81 Process = Process
82
83 def __init__(self, processes=None, initializer=None, initargs=()):
84 self._setup_queues()
85 self._taskqueue = Queue.Queue()
86 self._cache = {}
87 self._state = RUN
88
89 if processes is None:
90 try:
91 processes = cpu_count()
92 except NotImplementedError:
93 processes = 1
94
Jesse Noller7152f6d2009-04-02 05:17:26 +000095 if initializer is not None and not hasattr(initializer, '__call__'):
96 raise TypeError('initializer must be a callable')
97
Benjamin Peterson7f03ea72008-06-13 19:20:48 +000098 self._pool = []
99 for i in range(processes):
100 w = self.Process(
101 target=worker,
102 args=(self._inqueue, self._outqueue, initializer, initargs)
103 )
104 self._pool.append(w)
Jesse Noller5bc9f4c2008-08-19 19:06:19 +0000105 w.name = w.name.replace('Process', 'PoolWorker')
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000106 w.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000107 w.start()
108
109 self._task_handler = threading.Thread(
110 target=Pool._handle_tasks,
111 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
112 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000113 self._task_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000114 self._task_handler._state = RUN
115 self._task_handler.start()
116
117 self._result_handler = threading.Thread(
118 target=Pool._handle_results,
119 args=(self._outqueue, self._quick_get, self._cache)
120 )
Benjamin Peterson82aa2012008-08-18 18:31:58 +0000121 self._result_handler.daemon = True
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000122 self._result_handler._state = RUN
123 self._result_handler.start()
124
125 self._terminate = Finalize(
126 self, self._terminate_pool,
127 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
128 self._task_handler, self._result_handler, self._cache),
129 exitpriority=15
130 )
131
132 def _setup_queues(self):
133 from .queues import SimpleQueue
134 self._inqueue = SimpleQueue()
135 self._outqueue = SimpleQueue()
136 self._quick_put = self._inqueue._writer.send
137 self._quick_get = self._outqueue._reader.recv
138
139 def apply(self, func, args=(), kwds={}):
140 '''
141 Equivalent of `apply()` builtin
142 '''
143 assert self._state == RUN
144 return self.apply_async(func, args, kwds).get()
145
146 def map(self, func, iterable, chunksize=None):
147 '''
148 Equivalent of `map()` builtin
149 '''
150 assert self._state == RUN
151 return self.map_async(func, iterable, chunksize).get()
152
153 def imap(self, func, iterable, chunksize=1):
154 '''
Georg Brandl5ecd7452008-11-22 08:45:33 +0000155 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
Benjamin Peterson7f03ea72008-06-13 19:20:48 +0000156 '''
157 assert self._state == RUN
158 if chunksize == 1:
159 result = IMapIterator(self._cache)
160 self._taskqueue.put((((result._job, i, func, (x,), {})
161 for i, x in enumerate(iterable)), result._set_length))
162 return result
163 else:
164 assert chunksize > 1
165 task_batches = Pool._get_tasks(func, iterable, chunksize)
166 result = IMapIterator(self._cache)
167 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
168 for i, x in enumerate(task_batches)), result._set_length))
169 return (item for chunk in result for item in chunk)
170
171 def imap_unordered(self, func, iterable, chunksize=1):
172 '''
173 Like `imap()` method but ordering of results is arbitrary
174 '''
175 assert self._state == RUN
176 if chunksize == 1:
177 result = IMapUnorderedIterator(self._cache)
178 self._taskqueue.put((((result._job, i, func, (x,), {})
179 for i, x in enumerate(iterable)), result._set_length))
180 return result
181 else:
182 assert chunksize > 1
183 task_batches = Pool._get_tasks(func, iterable, chunksize)
184 result = IMapUnorderedIterator(self._cache)
185 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
186 for i, x in enumerate(task_batches)), result._set_length))
187 return (item for chunk in result for item in chunk)
188
189 def apply_async(self, func, args=(), kwds={}, callback=None):
190 '''
191 Asynchronous equivalent of `apply()` builtin
192 '''
193 assert self._state == RUN
194 result = ApplyResult(self._cache, callback)
195 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
196 return result
197
198 def map_async(self, func, iterable, chunksize=None, callback=None):
199 '''
200 Asynchronous equivalent of `map()` builtin
201 '''
202 assert self._state == RUN
203 if not hasattr(iterable, '__len__'):
204 iterable = list(iterable)
205
206 if chunksize is None:
207 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
208 if extra:
209 chunksize += 1
210
211 task_batches = Pool._get_tasks(func, iterable, chunksize)
212 result = MapResult(self._cache, chunksize, len(iterable), callback)
213 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
214 for i, x in enumerate(task_batches)), None))
215 return result
216
217 @staticmethod
218 def _handle_tasks(taskqueue, put, outqueue, pool):
219 thread = threading.current_thread()
220
221 for taskseq, set_length in iter(taskqueue.get, None):
222 i = -1
223 for i, task in enumerate(taskseq):
224 if thread._state:
225 debug('task handler found thread._state != RUN')
226 break
227 try:
228 put(task)
229 except IOError:
230 debug('could not put task on queue')
231 break
232 else:
233 if set_length:
234 debug('doing set_length()')
235 set_length(i+1)
236 continue
237 break
238 else:
239 debug('task handler got sentinel')
240
241
242 try:
243 # tell result handler to finish when cache is empty
244 debug('task handler sending sentinel to result handler')
245 outqueue.put(None)
246
247 # tell workers there is no more work
248 debug('task handler sending sentinel to workers')
249 for p in pool:
250 put(None)
251 except IOError:
252 debug('task handler got IOError when sending sentinels')
253
254 debug('task handler exiting')
255
256 @staticmethod
257 def _handle_results(outqueue, get, cache):
258 thread = threading.current_thread()
259
260 while 1:
261 try:
262 task = get()
263 except (IOError, EOFError):
264 debug('result handler got EOFError/IOError -- exiting')
265 return
266
267 if thread._state:
268 assert thread._state == TERMINATE
269 debug('result handler found thread._state=TERMINATE')
270 break
271
272 if task is None:
273 debug('result handler got sentinel')
274 break
275
276 job, i, obj = task
277 try:
278 cache[job]._set(i, obj)
279 except KeyError:
280 pass
281
282 while cache and thread._state != TERMINATE:
283 try:
284 task = get()
285 except (IOError, EOFError):
286 debug('result handler got EOFError/IOError -- exiting')
287 return
288
289 if task is None:
290 debug('result handler ignoring extra sentinel')
291 continue
292 job, i, obj = task
293 try:
294 cache[job]._set(i, obj)
295 except KeyError:
296 pass
297
298 if hasattr(outqueue, '_reader'):
299 debug('ensuring that outqueue is not full')
300 # If we don't make room available in outqueue then
301 # attempts to add the sentinel (None) to outqueue may
302 # block. There is guaranteed to be no more than 2 sentinels.
303 try:
304 for i in range(10):
305 if not outqueue._reader.poll():
306 break
307 get()
308 except (IOError, EOFError):
309 pass
310
311 debug('result handler exiting: len(cache)=%s, thread._state=%s',
312 len(cache), thread._state)
313
314 @staticmethod
315 def _get_tasks(func, it, size):
316 it = iter(it)
317 while 1:
318 x = tuple(itertools.islice(it, size))
319 if not x:
320 return
321 yield (func, x)
322
323 def __reduce__(self):
324 raise NotImplementedError(
325 'pool objects cannot be passed between processes or pickled'
326 )
327
328 def close(self):
329 debug('closing pool')
330 if self._state == RUN:
331 self._state = CLOSE
332 self._taskqueue.put(None)
333
334 def terminate(self):
335 debug('terminating pool')
336 self._state = TERMINATE
337 self._terminate()
338
339 def join(self):
340 debug('joining pool')
341 assert self._state in (CLOSE, TERMINATE)
342 self._task_handler.join()
343 self._result_handler.join()
344 for p in self._pool:
345 p.join()
346
347 @staticmethod
348 def _help_stuff_finish(inqueue, task_handler, size):
349 # task_handler may be blocked trying to put items on inqueue
350 debug('removing tasks from inqueue until task handler finished')
351 inqueue._rlock.acquire()
352 while task_handler.is_alive() and inqueue._reader.poll():
353 inqueue._reader.recv()
354 time.sleep(0)
355
356 @classmethod
357 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
358 task_handler, result_handler, cache):
359 # this is guaranteed to only be called once
360 debug('finalizing pool')
361
362 task_handler._state = TERMINATE
363 taskqueue.put(None) # sentinel
364
365 debug('helping task handler/workers to finish')
366 cls._help_stuff_finish(inqueue, task_handler, len(pool))
367
368 assert result_handler.is_alive() or len(cache) == 0
369
370 result_handler._state = TERMINATE
371 outqueue.put(None) # sentinel
372
373 if pool and hasattr(pool[0], 'terminate'):
374 debug('terminating workers')
375 for p in pool:
376 p.terminate()
377
378 debug('joining task handler')
379 task_handler.join(1e100)
380
381 debug('joining result handler')
382 result_handler.join(1e100)
383
384 if pool and hasattr(pool[0], 'terminate'):
385 debug('joining pool workers')
386 for p in pool:
387 p.join()
388
389#
390# Class whose instances are returned by `Pool.apply_async()`
391#
392
393class ApplyResult(object):
394
395 def __init__(self, cache, callback):
396 self._cond = threading.Condition(threading.Lock())
397 self._job = job_counter.next()
398 self._cache = cache
399 self._ready = False
400 self._callback = callback
401 cache[self._job] = self
402
403 def ready(self):
404 return self._ready
405
406 def successful(self):
407 assert self._ready
408 return self._success
409
410 def wait(self, timeout=None):
411 self._cond.acquire()
412 try:
413 if not self._ready:
414 self._cond.wait(timeout)
415 finally:
416 self._cond.release()
417
418 def get(self, timeout=None):
419 self.wait(timeout)
420 if not self._ready:
421 raise TimeoutError
422 if self._success:
423 return self._value
424 else:
425 raise self._value
426
427 def _set(self, i, obj):
428 self._success, self._value = obj
429 if self._callback and self._success:
430 self._callback(self._value)
431 self._cond.acquire()
432 try:
433 self._ready = True
434 self._cond.notify()
435 finally:
436 self._cond.release()
437 del self._cache[self._job]
438
439#
440# Class whose instances are returned by `Pool.map_async()`
441#
442
443class MapResult(ApplyResult):
444
445 def __init__(self, cache, chunksize, length, callback):
446 ApplyResult.__init__(self, cache, callback)
447 self._success = True
448 self._value = [None] * length
449 self._chunksize = chunksize
450 if chunksize <= 0:
451 self._number_left = 0
452 self._ready = True
453 else:
454 self._number_left = length//chunksize + bool(length % chunksize)
455
456 def _set(self, i, success_result):
457 success, result = success_result
458 if success:
459 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
460 self._number_left -= 1
461 if self._number_left == 0:
462 if self._callback:
463 self._callback(self._value)
464 del self._cache[self._job]
465 self._cond.acquire()
466 try:
467 self._ready = True
468 self._cond.notify()
469 finally:
470 self._cond.release()
471
472 else:
473 self._success = False
474 self._value = result
475 del self._cache[self._job]
476 self._cond.acquire()
477 try:
478 self._ready = True
479 self._cond.notify()
480 finally:
481 self._cond.release()
482
483#
484# Class whose instances are returned by `Pool.imap()`
485#
486
487class IMapIterator(object):
488
489 def __init__(self, cache):
490 self._cond = threading.Condition(threading.Lock())
491 self._job = job_counter.next()
492 self._cache = cache
493 self._items = collections.deque()
494 self._index = 0
495 self._length = None
496 self._unsorted = {}
497 cache[self._job] = self
498
499 def __iter__(self):
500 return self
501
502 def next(self, timeout=None):
503 self._cond.acquire()
504 try:
505 try:
506 item = self._items.popleft()
507 except IndexError:
508 if self._index == self._length:
509 raise StopIteration
510 self._cond.wait(timeout)
511 try:
512 item = self._items.popleft()
513 except IndexError:
514 if self._index == self._length:
515 raise StopIteration
516 raise TimeoutError
517 finally:
518 self._cond.release()
519
520 success, value = item
521 if success:
522 return value
523 raise value
524
525 __next__ = next # XXX
526
527 def _set(self, i, obj):
528 self._cond.acquire()
529 try:
530 if self._index == i:
531 self._items.append(obj)
532 self._index += 1
533 while self._index in self._unsorted:
534 obj = self._unsorted.pop(self._index)
535 self._items.append(obj)
536 self._index += 1
537 self._cond.notify()
538 else:
539 self._unsorted[i] = obj
540
541 if self._index == self._length:
542 del self._cache[self._job]
543 finally:
544 self._cond.release()
545
546 def _set_length(self, length):
547 self._cond.acquire()
548 try:
549 self._length = length
550 if self._index == self._length:
551 self._cond.notify()
552 del self._cache[self._job]
553 finally:
554 self._cond.release()
555
556#
557# Class whose instances are returned by `Pool.imap_unordered()`
558#
559
560class IMapUnorderedIterator(IMapIterator):
561
562 def _set(self, i, obj):
563 self._cond.acquire()
564 try:
565 self._items.append(obj)
566 self._index += 1
567 self._cond.notify()
568 if self._index == self._length:
569 del self._cache[self._job]
570 finally:
571 self._cond.release()
572
573#
574#
575#
576
577class ThreadPool(Pool):
578
579 from .dummy import Process
580
581 def __init__(self, processes=None, initializer=None, initargs=()):
582 Pool.__init__(self, processes, initializer, initargs)
583
584 def _setup_queues(self):
585 self._inqueue = Queue.Queue()
586 self._outqueue = Queue.Queue()
587 self._quick_put = self._inqueue.put
588 self._quick_get = self._outqueue.get
589
590 @staticmethod
591 def _help_stuff_finish(inqueue, task_handler, size):
592 # put sentinels at head of inqueue to make workers finish
593 inqueue.not_empty.acquire()
594 try:
595 inqueue.queue.clear()
596 inqueue.queue.extend([None] * size)
597 inqueue.not_empty.notify_all()
598 finally:
599 inqueue.not_empty.release()