blob: 3489aea284164ddd9b0e02c559ed1a5eee950fc4 [file] [log] [blame]
Benjamin Petersone711caf2008-06-11 16:44:04 +00001#
2# Module providing the `Pool` class for managing a process pool
3#
4# multiprocessing/pool.py
5#
6# Copyright (c) 2007-2008, R Oudkerk --- see COPYING.txt
7#
8
9__all__ = ['Pool']
10
11#
12# Imports
13#
14
15import threading
16import queue
17import itertools
18import collections
19import time
20
21from multiprocessing import Process, cpu_count, TimeoutError
22from multiprocessing.util import Finalize, debug
23
24#
25# Constants representing the state of a pool
26#
27
28RUN = 0
29CLOSE = 1
30TERMINATE = 2
31
32#
33# Miscellaneous
34#
35
36job_counter = itertools.count()
37
38def mapstar(args):
39 return list(map(*args))
40
41#
42# Code run by worker processes
43#
44
45def worker(inqueue, outqueue, initializer=None, initargs=()):
46 put = outqueue.put
47 get = inqueue.get
48 if hasattr(inqueue, '_writer'):
49 inqueue._writer.close()
50 outqueue._reader.close()
51
52 if initializer is not None:
53 initializer(*initargs)
54
55 while 1:
56 try:
57 task = get()
58 except (EOFError, IOError):
59 debug('worker got EOFError or IOError -- exiting')
60 break
61
62 if task is None:
63 debug('worker got sentinel -- exiting')
64 break
65
66 job, i, func, args, kwds = task
67 try:
68 result = (True, func(*args, **kwds))
69 except Exception as e:
70 result = (False, e)
71 put((job, i, result))
72
73#
74# Class representing a process pool
75#
76
77class Pool(object):
78 '''
Georg Brandl92905032008-11-22 08:51:39 +000079 Class which supports an async version of applying functions to arguments.
Benjamin Petersone711caf2008-06-11 16:44:04 +000080 '''
81 Process = Process
82
83 def __init__(self, processes=None, initializer=None, initargs=()):
84 self._setup_queues()
85 self._taskqueue = queue.Queue()
86 self._cache = {}
87 self._state = RUN
88
89 if processes is None:
90 try:
91 processes = cpu_count()
92 except NotImplementedError:
93 processes = 1
94
Benjamin Petersonf47ed4a2009-04-11 20:45:40 +000095 if initializer is not None and not hasattr(initializer, '__call__'):
96 raise TypeError('initializer must be a callable')
97
Benjamin Petersone711caf2008-06-11 16:44:04 +000098 self._pool = []
99 for i in range(processes):
100 w = self.Process(
101 target=worker,
102 args=(self._inqueue, self._outqueue, initializer, initargs)
103 )
104 self._pool.append(w)
Benjamin Peterson58ea9fe2008-08-19 19:17:39 +0000105 w.name = w.name.replace('Process', 'PoolWorker')
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000106 w.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000107 w.start()
108
109 self._task_handler = threading.Thread(
110 target=Pool._handle_tasks,
111 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
112 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000113 self._task_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000114 self._task_handler._state = RUN
115 self._task_handler.start()
116
117 self._result_handler = threading.Thread(
118 target=Pool._handle_results,
119 args=(self._outqueue, self._quick_get, self._cache)
120 )
Benjamin Petersonfae4c622008-08-18 18:40:08 +0000121 self._result_handler.daemon = True
Benjamin Petersone711caf2008-06-11 16:44:04 +0000122 self._result_handler._state = RUN
123 self._result_handler.start()
124
125 self._terminate = Finalize(
126 self, self._terminate_pool,
127 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
128 self._task_handler, self._result_handler, self._cache),
129 exitpriority=15
130 )
131
132 def _setup_queues(self):
133 from .queues import SimpleQueue
134 self._inqueue = SimpleQueue()
135 self._outqueue = SimpleQueue()
136 self._quick_put = self._inqueue._writer.send
137 self._quick_get = self._outqueue._reader.recv
138
139 def apply(self, func, args=(), kwds={}):
140 '''
Georg Brandl92905032008-11-22 08:51:39 +0000141 Equivalent of `func(*args, **kwds)`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000142 '''
143 assert self._state == RUN
144 return self.apply_async(func, args, kwds).get()
145
146 def map(self, func, iterable, chunksize=None):
147 '''
Georg Brandl92905032008-11-22 08:51:39 +0000148 Apply `func` to each element in `iterable`, collecting the results
149 in a list that is returned.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000150 '''
151 assert self._state == RUN
152 return self.map_async(func, iterable, chunksize).get()
153
154 def imap(self, func, iterable, chunksize=1):
155 '''
Georg Brandl92905032008-11-22 08:51:39 +0000156 Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000157 '''
158 assert self._state == RUN
159 if chunksize == 1:
160 result = IMapIterator(self._cache)
161 self._taskqueue.put((((result._job, i, func, (x,), {})
162 for i, x in enumerate(iterable)), result._set_length))
163 return result
164 else:
165 assert chunksize > 1
166 task_batches = Pool._get_tasks(func, iterable, chunksize)
167 result = IMapIterator(self._cache)
168 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
169 for i, x in enumerate(task_batches)), result._set_length))
170 return (item for chunk in result for item in chunk)
171
172 def imap_unordered(self, func, iterable, chunksize=1):
173 '''
Georg Brandl92905032008-11-22 08:51:39 +0000174 Like `imap()` method but ordering of results is arbitrary.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000175 '''
176 assert self._state == RUN
177 if chunksize == 1:
178 result = IMapUnorderedIterator(self._cache)
179 self._taskqueue.put((((result._job, i, func, (x,), {})
180 for i, x in enumerate(iterable)), result._set_length))
181 return result
182 else:
183 assert chunksize > 1
184 task_batches = Pool._get_tasks(func, iterable, chunksize)
185 result = IMapUnorderedIterator(self._cache)
186 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
187 for i, x in enumerate(task_batches)), result._set_length))
188 return (item for chunk in result for item in chunk)
189
190 def apply_async(self, func, args=(), kwds={}, callback=None):
191 '''
Georg Brandl92905032008-11-22 08:51:39 +0000192 Asynchronous version of `apply()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000193 '''
194 assert self._state == RUN
195 result = ApplyResult(self._cache, callback)
196 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
197 return result
198
199 def map_async(self, func, iterable, chunksize=None, callback=None):
200 '''
Georg Brandl92905032008-11-22 08:51:39 +0000201 Asynchronous version of `map()` method.
Benjamin Petersone711caf2008-06-11 16:44:04 +0000202 '''
203 assert self._state == RUN
204 if not hasattr(iterable, '__len__'):
205 iterable = list(iterable)
206
207 if chunksize is None:
208 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
209 if extra:
210 chunksize += 1
Georg Brandld80344f2009-08-13 12:26:19 +0000211 if len(iterable) == 0:
212 chunksize = 0
Benjamin Petersone711caf2008-06-11 16:44:04 +0000213
214 task_batches = Pool._get_tasks(func, iterable, chunksize)
215 result = MapResult(self._cache, chunksize, len(iterable), callback)
216 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
217 for i, x in enumerate(task_batches)), None))
218 return result
219
220 @staticmethod
221 def _handle_tasks(taskqueue, put, outqueue, pool):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000222 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000223
224 for taskseq, set_length in iter(taskqueue.get, None):
225 i = -1
226 for i, task in enumerate(taskseq):
227 if thread._state:
228 debug('task handler found thread._state != RUN')
229 break
230 try:
231 put(task)
232 except IOError:
233 debug('could not put task on queue')
234 break
235 else:
236 if set_length:
237 debug('doing set_length()')
238 set_length(i+1)
239 continue
240 break
241 else:
242 debug('task handler got sentinel')
243
244
245 try:
246 # tell result handler to finish when cache is empty
247 debug('task handler sending sentinel to result handler')
248 outqueue.put(None)
249
250 # tell workers there is no more work
251 debug('task handler sending sentinel to workers')
252 for p in pool:
253 put(None)
254 except IOError:
255 debug('task handler got IOError when sending sentinels')
256
257 debug('task handler exiting')
258
259 @staticmethod
260 def _handle_results(outqueue, get, cache):
Benjamin Peterson672b8032008-06-11 19:14:14 +0000261 thread = threading.current_thread()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000262
263 while 1:
264 try:
265 task = get()
266 except (IOError, EOFError):
267 debug('result handler got EOFError/IOError -- exiting')
268 return
269
270 if thread._state:
271 assert thread._state == TERMINATE
272 debug('result handler found thread._state=TERMINATE')
273 break
274
275 if task is None:
276 debug('result handler got sentinel')
277 break
278
279 job, i, obj = task
280 try:
281 cache[job]._set(i, obj)
282 except KeyError:
283 pass
284
285 while cache and thread._state != TERMINATE:
286 try:
287 task = get()
288 except (IOError, EOFError):
289 debug('result handler got EOFError/IOError -- exiting')
290 return
291
292 if task is None:
293 debug('result handler ignoring extra sentinel')
294 continue
295 job, i, obj = task
296 try:
297 cache[job]._set(i, obj)
298 except KeyError:
299 pass
300
301 if hasattr(outqueue, '_reader'):
302 debug('ensuring that outqueue is not full')
303 # If we don't make room available in outqueue then
304 # attempts to add the sentinel (None) to outqueue may
305 # block. There is guaranteed to be no more than 2 sentinels.
306 try:
307 for i in range(10):
308 if not outqueue._reader.poll():
309 break
310 get()
311 except (IOError, EOFError):
312 pass
313
314 debug('result handler exiting: len(cache)=%s, thread._state=%s',
315 len(cache), thread._state)
316
317 @staticmethod
318 def _get_tasks(func, it, size):
319 it = iter(it)
320 while 1:
321 x = tuple(itertools.islice(it, size))
322 if not x:
323 return
324 yield (func, x)
325
326 def __reduce__(self):
327 raise NotImplementedError(
328 'pool objects cannot be passed between processes or pickled'
329 )
330
331 def close(self):
332 debug('closing pool')
333 if self._state == RUN:
334 self._state = CLOSE
335 self._taskqueue.put(None)
336
337 def terminate(self):
338 debug('terminating pool')
339 self._state = TERMINATE
340 self._terminate()
341
342 def join(self):
343 debug('joining pool')
344 assert self._state in (CLOSE, TERMINATE)
345 self._task_handler.join()
346 self._result_handler.join()
347 for p in self._pool:
348 p.join()
349
350 @staticmethod
351 def _help_stuff_finish(inqueue, task_handler, size):
352 # task_handler may be blocked trying to put items on inqueue
353 debug('removing tasks from inqueue until task handler finished')
354 inqueue._rlock.acquire()
Benjamin Peterson672b8032008-06-11 19:14:14 +0000355 while task_handler.is_alive() and inqueue._reader.poll():
Benjamin Petersone711caf2008-06-11 16:44:04 +0000356 inqueue._reader.recv()
357 time.sleep(0)
358
359 @classmethod
360 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
361 task_handler, result_handler, cache):
362 # this is guaranteed to only be called once
363 debug('finalizing pool')
364
365 task_handler._state = TERMINATE
366 taskqueue.put(None) # sentinel
367
368 debug('helping task handler/workers to finish')
369 cls._help_stuff_finish(inqueue, task_handler, len(pool))
370
Benjamin Peterson672b8032008-06-11 19:14:14 +0000371 assert result_handler.is_alive() or len(cache) == 0
Benjamin Petersone711caf2008-06-11 16:44:04 +0000372
373 result_handler._state = TERMINATE
374 outqueue.put(None) # sentinel
375
376 if pool and hasattr(pool[0], 'terminate'):
377 debug('terminating workers')
378 for p in pool:
379 p.terminate()
380
381 debug('joining task handler')
382 task_handler.join(1e100)
383
384 debug('joining result handler')
385 result_handler.join(1e100)
386
387 if pool and hasattr(pool[0], 'terminate'):
388 debug('joining pool workers')
389 for p in pool:
Florent Xicluna9b0e9182010-03-28 11:42:38 +0000390 if p.is_alive():
391 # worker has not yet exited
392 debug('cleaning up worker %d' % p.pid)
393 p.join()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000394
395#
396# Class whose instances are returned by `Pool.apply_async()`
397#
398
399class ApplyResult(object):
400
401 def __init__(self, cache, callback):
402 self._cond = threading.Condition(threading.Lock())
403 self._job = next(job_counter)
404 self._cache = cache
405 self._ready = False
406 self._callback = callback
407 cache[self._job] = self
408
409 def ready(self):
410 return self._ready
411
412 def successful(self):
413 assert self._ready
414 return self._success
415
416 def wait(self, timeout=None):
417 self._cond.acquire()
418 try:
419 if not self._ready:
420 self._cond.wait(timeout)
421 finally:
422 self._cond.release()
423
424 def get(self, timeout=None):
425 self.wait(timeout)
426 if not self._ready:
427 raise TimeoutError
428 if self._success:
429 return self._value
430 else:
431 raise self._value
432
433 def _set(self, i, obj):
434 self._success, self._value = obj
435 if self._callback and self._success:
436 self._callback(self._value)
437 self._cond.acquire()
438 try:
439 self._ready = True
440 self._cond.notify()
441 finally:
442 self._cond.release()
443 del self._cache[self._job]
444
445#
446# Class whose instances are returned by `Pool.map_async()`
447#
448
449class MapResult(ApplyResult):
450
451 def __init__(self, cache, chunksize, length, callback):
452 ApplyResult.__init__(self, cache, callback)
453 self._success = True
454 self._value = [None] * length
455 self._chunksize = chunksize
456 if chunksize <= 0:
457 self._number_left = 0
458 self._ready = True
459 else:
460 self._number_left = length//chunksize + bool(length % chunksize)
461
462 def _set(self, i, success_result):
463 success, result = success_result
464 if success:
465 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
466 self._number_left -= 1
467 if self._number_left == 0:
468 if self._callback:
469 self._callback(self._value)
470 del self._cache[self._job]
471 self._cond.acquire()
472 try:
473 self._ready = True
474 self._cond.notify()
475 finally:
476 self._cond.release()
477
478 else:
479 self._success = False
480 self._value = result
481 del self._cache[self._job]
482 self._cond.acquire()
483 try:
484 self._ready = True
485 self._cond.notify()
486 finally:
487 self._cond.release()
488
489#
490# Class whose instances are returned by `Pool.imap()`
491#
492
493class IMapIterator(object):
494
495 def __init__(self, cache):
496 self._cond = threading.Condition(threading.Lock())
497 self._job = next(job_counter)
498 self._cache = cache
499 self._items = collections.deque()
500 self._index = 0
501 self._length = None
502 self._unsorted = {}
503 cache[self._job] = self
504
505 def __iter__(self):
506 return self
507
508 def next(self, timeout=None):
509 self._cond.acquire()
510 try:
511 try:
512 item = self._items.popleft()
513 except IndexError:
514 if self._index == self._length:
515 raise StopIteration
516 self._cond.wait(timeout)
517 try:
518 item = self._items.popleft()
519 except IndexError:
520 if self._index == self._length:
521 raise StopIteration
522 raise TimeoutError
523 finally:
524 self._cond.release()
525
526 success, value = item
527 if success:
528 return value
529 raise value
530
531 __next__ = next # XXX
532
533 def _set(self, i, obj):
534 self._cond.acquire()
535 try:
536 if self._index == i:
537 self._items.append(obj)
538 self._index += 1
539 while self._index in self._unsorted:
540 obj = self._unsorted.pop(self._index)
541 self._items.append(obj)
542 self._index += 1
543 self._cond.notify()
544 else:
545 self._unsorted[i] = obj
546
547 if self._index == self._length:
548 del self._cache[self._job]
549 finally:
550 self._cond.release()
551
552 def _set_length(self, length):
553 self._cond.acquire()
554 try:
555 self._length = length
556 if self._index == self._length:
557 self._cond.notify()
558 del self._cache[self._job]
559 finally:
560 self._cond.release()
561
562#
563# Class whose instances are returned by `Pool.imap_unordered()`
564#
565
566class IMapUnorderedIterator(IMapIterator):
567
568 def _set(self, i, obj):
569 self._cond.acquire()
570 try:
571 self._items.append(obj)
572 self._index += 1
573 self._cond.notify()
574 if self._index == self._length:
575 del self._cache[self._job]
576 finally:
577 self._cond.release()
578
579#
580#
581#
582
583class ThreadPool(Pool):
584
585 from .dummy import Process
586
587 def __init__(self, processes=None, initializer=None, initargs=()):
588 Pool.__init__(self, processes, initializer, initargs)
589
590 def _setup_queues(self):
591 self._inqueue = queue.Queue()
592 self._outqueue = queue.Queue()
593 self._quick_put = self._inqueue.put
594 self._quick_get = self._outqueue.get
595
596 @staticmethod
597 def _help_stuff_finish(inqueue, task_handler, size):
598 # put sentinels at head of inqueue to make workers finish
599 inqueue.not_empty.acquire()
600 try:
601 inqueue.queue.clear()
602 inqueue.queue.extend([None] * size)
Benjamin Peterson672b8032008-06-11 19:14:14 +0000603 inqueue.not_empty.notify_all()
Benjamin Petersone711caf2008-06-11 16:44:04 +0000604 finally:
605 inqueue.not_empty.release()