blob: bf2608d127f5c5b46a6b8bd490b34c2d94df0d3f [file] [log] [blame]
Benjamin Petersone711caf2008-06-11 16:44:04 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
6# Copyright (c) 2007-2008, R Oudkerk --- see COPYING.txt
7#
8
9__all__ = ['Pool']
10
11#
12# Imports
13#
14
15import threading
16import queue
17import itertools
18import collections
19import time
20
21from multiprocessing import Process, cpu_count, TimeoutError
22from multiprocessing.util import Finalize, debug
23
24#
25# Constants representing the state of a pool
26#
27
28RUN = 0
29CLOSE = 1
30TERMINATE = 2
31
32#
33# Miscellaneous
34#
35
36job_counter = itertools.count()
37
38def mapstar(args):
39 return list(map(*args))
40
41#
42# Code run by worker processes
43#
44
45def worker(inqueue, outqueue, initializer=None, initargs=()):
46 put = outqueue.put
47 get = inqueue.get
48 if hasattr(inqueue, '_writer'):
49 inqueue._writer.close()
50 outqueue._reader.close()
51
52 if initializer is not None:
53 initializer(*initargs)
54
55 while 1:
56 try:
57 task = get()
58 except (EOFError, IOError):
59 debug('worker got EOFError or IOError -- exiting')
60 break
61
62 if task is None:
63 debug('worker got sentinel -- exiting')
64 break
65
66 job, i, func, args, kwds = task
67 try:
68 result = (True, func(*args, **kwds))
69 except Exception as e:
70 result = (False, e)
71 put((job, i, result))
72
73#
74# Class representing a process pool
75#
76
77class Pool(object):
78 '''
Georg Brandl92905032008-11-22 08:51:39 +000079 Class which supports an async version of applying functions to arguments.
Benjamin Petersone711caf2008-06-11 16:44:04 +000080 '''
81 Process = Process
82
83 def __init__(self, processes=None, initializer=None, initargs=()):
84 self._setup_queues()
85 self._taskqueue = queue.Queue()
86 self._cache = {}
87 self._state = RUN
88
89 if processes is None:
90 try:
91 processes = cpu_count()
92 except NotImplementedError:
93 processes = 1
94
95 self._pool = []
96 for i in range(processes):
97 w = self.Process(
98 target=worker,
99 args=(self._inqueue, self._outqueue, initializer, initargs)
100 )
101 self._pool.append(w)
Benjamin Peterson58ea9fe2008-08-19 19:17:39 +0000102 w.name = w.name.replace('Process', 'PoolWorker')
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000103 w.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000104 w.start()
105
106 self._task_handler = threading.Thread(
107 target=Pool._handle_tasks,
108 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
109 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000110 self._task_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000111 self._task_handler._state = RUN
112 self._task_handler.start()
113
114 self._result_handler = threading.Thread(
115 target=Pool._handle_results,
116 args=(self._outqueue, self._quick_get, self._cache)
117 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000118 self._result_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000119 self._result_handler._state = RUN
120 self._result_handler.start()
121
122 self._terminate = Finalize(
123 self, self._terminate_pool,
124 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
125 self._task_handler, self._result_handler, self._cache),
126 exitpriority=15
127 )
128
129 def _setup_queues(self):
130 from .queues import SimpleQueue
131 self._inqueue = SimpleQueue()
132 self._outqueue = SimpleQueue()
133 self._quick_put = self._inqueue._writer.send
134 self._quick_get = self._outqueue._reader.recv
135
136 def apply(self, func, args=(), kwds={}):
137 '''
Georg Brandl92905032008-11-22 08:51:39 +0000138 Equivalent of `func(*args, **kwds)`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000139 '''
140 assert self._state == RUN
141 return self.apply_async(func, args, kwds).get()
142
143 def map(self, func, iterable, chunksize=None):
144 '''
Georg Brandl92905032008-11-22 08:51:39 +0000145 Apply `func` to each element in `iterable`, collecting the results
146 in a list that is returned.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000147 '''
148 assert self._state == RUN
149 return self.map_async(func, iterable, chunksize).get()
150
151 def imap(self, func, iterable, chunksize=1):
152 '''
Georg Brandl92905032008-11-22 08:51:39 +0000153 Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000154 '''
155 assert self._state == RUN
156 if chunksize == 1:
157 result = IMapIterator(self._cache)
158 self._taskqueue.put((((result._job, i, func, (x,), {})
159 for i, x in enumerate(iterable)), result._set_length))
160 return result
161 else:
162 assert chunksize > 1
163 task_batches = Pool._get_tasks(func, iterable, chunksize)
164 result = IMapIterator(self._cache)
165 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
166 for i, x in enumerate(task_batches)), result._set_length))
167 return (item for chunk in result for item in chunk)
168
169 def imap_unordered(self, func, iterable, chunksize=1):
170 '''
Georg Brandl92905032008-11-22 08:51:39 +0000171 Like `imap()` method but ordering of results is arbitrary.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000172 '''
173 assert self._state == RUN
174 if chunksize == 1:
175 result = IMapUnorderedIterator(self._cache)
176 self._taskqueue.put((((result._job, i, func, (x,), {})
177 for i, x in enumerate(iterable)), result._set_length))
178 return result
179 else:
180 assert chunksize > 1
181 task_batches = Pool._get_tasks(func, iterable, chunksize)
182 result = IMapUnorderedIterator(self._cache)
183 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
184 for i, x in enumerate(task_batches)), result._set_length))
185 return (item for chunk in result for item in chunk)
186
187 def apply_async(self, func, args=(), kwds={}, callback=None):
188 '''
Georg Brandl92905032008-11-22 08:51:39 +0000189 Asynchronous version of `apply()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000190 '''
191 assert self._state == RUN
192 result = ApplyResult(self._cache, callback)
193 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
194 return result
195
196 def map_async(self, func, iterable, chunksize=None, callback=None):
197 '''
Georg Brandl92905032008-11-22 08:51:39 +0000198 Asynchronous version of `map()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000199 '''
200 assert self._state == RUN
201 if not hasattr(iterable, '__len__'):
202 iterable = list(iterable)
203
204 if chunksize is None:
205 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
206 if extra:
207 chunksize += 1
208
209 task_batches = Pool._get_tasks(func, iterable, chunksize)
210 result = MapResult(self._cache, chunksize, len(iterable), callback)
211 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
212 for i, x in enumerate(task_batches)), None))
213 return result
214
215 @staticmethod
216 def _handle_tasks(taskqueue, put, outqueue, pool):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000217 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000218
219 for taskseq, set_length in iter(taskqueue.get, None):
220 i = -1
221 for i, task in enumerate(taskseq):
222 if thread._state:
223 debug('task handler found thread._state != RUN')
224 break
225 try:
226 put(task)
227 except IOError:
228 debug('could not put task on queue')
229 break
230 else:
231 if set_length:
232 debug('doing set_length()')
233 set_length(i+1)
234 continue
235 break
236 else:
237 debug('task handler got sentinel')
238
239
240 try:
241 # tell result handler to finish when cache is empty
242 debug('task handler sending sentinel to result handler')
243 outqueue.put(None)
244
245 # tell workers there is no more work
246 debug('task handler sending sentinel to workers')
247 for p in pool:
248 put(None)
249 except IOError:
250 debug('task handler got IOError when sending sentinels')
251
252 debug('task handler exiting')
253
254 @staticmethod
255 def _handle_results(outqueue, get, cache):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000256 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000257
258 while 1:
259 try:
260 task = get()
261 except (IOError, EOFError):
262 debug('result handler got EOFError/IOError -- exiting')
263 return
264
265 if thread._state:
266 assert thread._state == TERMINATE
267 debug('result handler found thread._state=TERMINATE')
268 break
269
270 if task is None:
271 debug('result handler got sentinel')
272 break
273
274 job, i, obj = task
275 try:
276 cache[job]._set(i, obj)
277 except KeyError:
278 pass
279
280 while cache and thread._state != TERMINATE:
281 try:
282 task = get()
283 except (IOError, EOFError):
284 debug('result handler got EOFError/IOError -- exiting')
285 return
286
287 if task is None:
288 debug('result handler ignoring extra sentinel')
289 continue
290 job, i, obj = task
291 try:
292 cache[job]._set(i, obj)
293 except KeyError:
294 pass
295
296 if hasattr(outqueue, '_reader'):
297 debug('ensuring that outqueue is not full')
298 # If we don't make room available in outqueue then
299 # attempts to add the sentinel (None) to outqueue may
300 # block. There is guaranteed to be no more than 2 sentinels.
301 try:
302 for i in range(10):
303 if not outqueue._reader.poll():
304 break
305 get()
306 except (IOError, EOFError):
307 pass
308
309 debug('result handler exiting: len(cache)=%s, thread._state=%s',
310 len(cache), thread._state)
311
312 @staticmethod
313 def _get_tasks(func, it, size):
314 it = iter(it)
315 while 1:
316 x = tuple(itertools.islice(it, size))
317 if not x:
318 return
319 yield (func, x)
320
321 def __reduce__(self):
322 raise NotImplementedError(
323 'pool objects cannot be passed between processes or pickled'
324 )
325
326 def close(self):
327 debug('closing pool')
328 if self._state == RUN:
329 self._state = CLOSE
330 self._taskqueue.put(None)
331
332 def terminate(self):
333 debug('terminating pool')
334 self._state = TERMINATE
335 self._terminate()
336
337 def join(self):
338 debug('joining pool')
339 assert self._state in (CLOSE, TERMINATE)
340 self._task_handler.join()
341 self._result_handler.join()
342 for p in self._pool:
343 p.join()
344
345 @staticmethod
346 def _help_stuff_finish(inqueue, task_handler, size):
347 # task_handler may be blocked trying to put items on inqueue
348 debug('removing tasks from inqueue until task handler finished')
349 inqueue._rlock.acquire()
Benjamin Peterson672b8032008-06-11 19:14:14 +0000350 while task_handler.is_alive() and inqueue._reader.poll():
Benjamin Petersone711caf2008-06-11 16:44:04 +0000351 inqueue._reader.recv()
352 time.sleep(0)
353
354 @classmethod
355 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
356 task_handler, result_handler, cache):
357 # this is guaranteed to only be called once
358 debug('finalizing pool')
359
360 task_handler._state = TERMINATE
361 taskqueue.put(None) # sentinel
362
363 debug('helping task handler/workers to finish')
364 cls._help_stuff_finish(inqueue, task_handler, len(pool))
365
Benjamin Peterson672b8032008-06-11 19:14:14 +0000366 assert result_handler.is_alive() or len(cache) == 0
Benjamin Petersone711caf2008-06-11 16:44:04 +0000367
368 result_handler._state = TERMINATE
369 outqueue.put(None) # sentinel
370
371 if pool and hasattr(pool[0], 'terminate'):
372 debug('terminating workers')
373 for p in pool:
374 p.terminate()
375
376 debug('joining task handler')
377 task_handler.join(1e100)
378
379 debug('joining result handler')
380 result_handler.join(1e100)
381
382 if pool and hasattr(pool[0], 'terminate'):
383 debug('joining pool workers')
384 for p in pool:
385 p.join()
386
387#
388# Class whose instances are returned by `Pool.apply_async()`
389#
390
391class ApplyResult(object):
392
393 def __init__(self, cache, callback):
394 self._cond = threading.Condition(threading.Lock())
395 self._job = next(job_counter)
396 self._cache = cache
397 self._ready = False
398 self._callback = callback
399 cache[self._job] = self
400
401 def ready(self):
402 return self._ready
403
404 def successful(self):
405 assert self._ready
406 return self._success
407
408 def wait(self, timeout=None):
409 self._cond.acquire()
410 try:
411 if not self._ready:
412 self._cond.wait(timeout)
413 finally:
414 self._cond.release()
415
416 def get(self, timeout=None):
417 self.wait(timeout)
418 if not self._ready:
419 raise TimeoutError
420 if self._success:
421 return self._value
422 else:
423 raise self._value
424
425 def _set(self, i, obj):
426 self._success, self._value = obj
427 if self._callback and self._success:
428 self._callback(self._value)
429 self._cond.acquire()
430 try:
431 self._ready = True
432 self._cond.notify()
433 finally:
434 self._cond.release()
435 del self._cache[self._job]
436
437#
438# Class whose instances are returned by `Pool.map_async()`
439#
440
441class MapResult(ApplyResult):
442
443 def __init__(self, cache, chunksize, length, callback):
444 ApplyResult.__init__(self, cache, callback)
445 self._success = True
446 self._value = [None] * length
447 self._chunksize = chunksize
448 if chunksize <= 0:
449 self._number_left = 0
450 self._ready = True
451 else:
452 self._number_left = length//chunksize + bool(length % chunksize)
453
454 def _set(self, i, success_result):
455 success, result = success_result
456 if success:
457 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
458 self._number_left -= 1
459 if self._number_left == 0:
460 if self._callback:
461 self._callback(self._value)
462 del self._cache[self._job]
463 self._cond.acquire()
464 try:
465 self._ready = True
466 self._cond.notify()
467 finally:
468 self._cond.release()
469
470 else:
471 self._success = False
472 self._value = result
473 del self._cache[self._job]
474 self._cond.acquire()
475 try:
476 self._ready = True
477 self._cond.notify()
478 finally:
479 self._cond.release()
480
481#
482# Class whose instances are returned by `Pool.imap()`
483#
484
485class IMapIterator(object):
486
487 def __init__(self, cache):
488 self._cond = threading.Condition(threading.Lock())
489 self._job = next(job_counter)
490 self._cache = cache
491 self._items = collections.deque()
492 self._index = 0
493 self._length = None
494 self._unsorted = {}
495 cache[self._job] = self
496
497 def __iter__(self):
498 return self
499
500 def next(self, timeout=None):
501 self._cond.acquire()
502 try:
503 try:
504 item = self._items.popleft()
505 except IndexError:
506 if self._index == self._length:
507 raise StopIteration
508 self._cond.wait(timeout)
509 try:
510 item = self._items.popleft()
511 except IndexError:
512 if self._index == self._length:
513 raise StopIteration
514 raise TimeoutError
515 finally:
516 self._cond.release()
517
518 success, value = item
519 if success:
520 return value
521 raise value
522
523 __next__ = next # XXX
524
525 def _set(self, i, obj):
526 self._cond.acquire()
527 try:
528 if self._index == i:
529 self._items.append(obj)
530 self._index += 1
531 while self._index in self._unsorted:
532 obj = self._unsorted.pop(self._index)
533 self._items.append(obj)
534 self._index += 1
535 self._cond.notify()
536 else:
537 self._unsorted[i] = obj
538
539 if self._index == self._length:
540 del self._cache[self._job]
541 finally:
542 self._cond.release()
543
544 def _set_length(self, length):
545 self._cond.acquire()
546 try:
547 self._length = length
548 if self._index == self._length:
549 self._cond.notify()
550 del self._cache[self._job]
551 finally:
552 self._cond.release()
553
554#
555# Class whose instances are returned by `Pool.imap_unordered()`
556#
557
558class IMapUnorderedIterator(IMapIterator):
559
560 def _set(self, i, obj):
561 self._cond.acquire()
562 try:
563 self._items.append(obj)
564 self._index += 1
565 self._cond.notify()
566 if self._index == self._length:
567 del self._cache[self._job]
568 finally:
569 self._cond.release()
570
571#
572#
573#
574
575class ThreadPool(Pool):
576
577 from .dummy import Process
578
579 def __init__(self, processes=None, initializer=None, initargs=()):
580 Pool.__init__(self, processes, initializer, initargs)
581
582 def _setup_queues(self):
583 self._inqueue = queue.Queue()
584 self._outqueue = queue.Queue()
585 self._quick_put = self._inqueue.put
586 self._quick_get = self._outqueue.get
587
588 @staticmethod
589 def _help_stuff_finish(inqueue, task_handler, size):
590 # put sentinels at head of inqueue to make workers finish
591 inqueue.not_empty.acquire()
592 try:
593 inqueue.queue.clear()
594 inqueue.queue.extend([None] * size)
Benjamin Peterson672b8032008-06-11 19:14:14 +0000595 inqueue.not_empty.notify_all()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000596 finally:
597 inqueue.not_empty.release()