| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 1 | # | 
|  | 2 | # Module providing the `Pool` class for managing a process pool | 
|  | 3 | # | 
|  | 4 | # multiprocessing/pool.py | 
|  | 5 | # | 
| R. David Murray | 3fc969a | 2010-12-14 01:38:16 +0000 | [diff] [blame] | 6 | # Copyright (c) 2006-2008, R Oudkerk | 
| Richard Oudkerk | 3e268aa | 2012-04-30 12:13:55 +0100 | [diff] [blame] | 7 | # Licensed to PSF under a Contributor Agreement. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 8 | # | 
|  | 9 |  | 
|  | 10 | __all__ = ['Pool'] | 
|  | 11 |  | 
|  | 12 | # | 
|  | 13 | # Imports | 
|  | 14 | # | 
|  | 15 |  | 
|  | 16 | import threading | 
|  | 17 | import queue | 
|  | 18 | import itertools | 
|  | 19 | import collections | 
|  | 20 | import time | 
|  | 21 |  | 
|  | 22 | from multiprocessing import Process, cpu_count, TimeoutError | 
|  | 23 | from multiprocessing.util import Finalize, debug | 
|  | 24 |  | 
|  | 25 | # | 
|  | 26 | # Constants representing the state of a pool | 
|  | 27 | # | 
|  | 28 |  | 
|  | 29 | RUN = 0 | 
|  | 30 | CLOSE = 1 | 
|  | 31 | TERMINATE = 2 | 
|  | 32 |  | 
|  | 33 | # | 
|  | 34 | # Miscellaneous | 
|  | 35 | # | 
|  | 36 |  | 
|  | 37 | job_counter = itertools.count() | 
|  | 38 |  | 
|  | 39 | def mapstar(args): | 
|  | 40 | return list(map(*args)) | 
|  | 41 |  | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 42 | def starmapstar(args): | 
|  | 43 | return list(itertools.starmap(args[0], args[1])) | 
|  | 44 |  | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 45 | # | 
|  | 46 | # Code run by worker processes | 
|  | 47 | # | 
|  | 48 |  | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 49 | class MaybeEncodingError(Exception): | 
|  | 50 | """Wraps possible unpickleable errors, so they can be | 
|  | 51 | safely sent through the socket.""" | 
|  | 52 |  | 
|  | 53 | def __init__(self, exc, value): | 
|  | 54 | self.exc = repr(exc) | 
|  | 55 | self.value = repr(value) | 
|  | 56 | super(MaybeEncodingError, self).__init__(self.exc, self.value) | 
|  | 57 |  | 
|  | 58 | def __str__(self): | 
|  | 59 | return "Error sending result: '%s'. Reason: '%s'" % (self.value, | 
|  | 60 | self.exc) | 
|  | 61 |  | 
|  | 62 | def __repr__(self): | 
|  | 63 | return "<MaybeEncodingError: %s>" % str(self) | 
|  | 64 |  | 
|  | 65 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 66 | def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None): | 
|  | 67 | assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 68 | put = outqueue.put | 
|  | 69 | get = inqueue.get | 
|  | 70 | if hasattr(inqueue, '_writer'): | 
|  | 71 | inqueue._writer.close() | 
|  | 72 | outqueue._reader.close() | 
|  | 73 |  | 
|  | 74 | if initializer is not None: | 
|  | 75 | initializer(*initargs) | 
|  | 76 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 77 | completed = 0 | 
|  | 78 | while maxtasks is None or (maxtasks and completed < maxtasks): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 79 | try: | 
|  | 80 | task = get() | 
|  | 81 | except (EOFError, IOError): | 
|  | 82 | debug('worker got EOFError or IOError -- exiting') | 
|  | 83 | break | 
|  | 84 |  | 
|  | 85 | if task is None: | 
|  | 86 | debug('worker got sentinel -- exiting') | 
|  | 87 | break | 
|  | 88 |  | 
|  | 89 | job, i, func, args, kwds = task | 
|  | 90 | try: | 
|  | 91 | result = (True, func(*args, **kwds)) | 
|  | 92 | except Exception as e: | 
|  | 93 | result = (False, e) | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 94 | try: | 
|  | 95 | put((job, i, result)) | 
|  | 96 | except Exception as e: | 
|  | 97 | wrapped = MaybeEncodingError(e, result[1]) | 
|  | 98 | debug("Possible encoding error while sending result: %s" % ( | 
|  | 99 | wrapped)) | 
|  | 100 | put((job, i, (False, wrapped))) | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 101 | completed += 1 | 
|  | 102 | debug('worker exiting after %d tasks' % completed) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 103 |  | 
|  | 104 | # | 
|  | 105 | # Class representing a process pool | 
|  | 106 | # | 
|  | 107 |  | 
|  | 108 | class Pool(object): | 
|  | 109 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 110 | Class which supports an async version of applying functions to arguments. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 111 | ''' | 
|  | 112 | Process = Process | 
|  | 113 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 114 | def __init__(self, processes=None, initializer=None, initargs=(), | 
|  | 115 | maxtasksperchild=None): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 116 | self._setup_queues() | 
|  | 117 | self._taskqueue = queue.Queue() | 
|  | 118 | self._cache = {} | 
|  | 119 | self._state = RUN | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 120 | self._maxtasksperchild = maxtasksperchild | 
|  | 121 | self._initializer = initializer | 
|  | 122 | self._initargs = initargs | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 123 |  | 
|  | 124 | if processes is None: | 
|  | 125 | try: | 
|  | 126 | processes = cpu_count() | 
|  | 127 | except NotImplementedError: | 
|  | 128 | processes = 1 | 
| Victor Stinner | 2fae27b | 2011-06-20 17:53:35 +0200 | [diff] [blame] | 129 | if processes < 1: | 
|  | 130 | raise ValueError("Number of processes must be at least 1") | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 131 |  | 
| Florent Xicluna | 5d1155c | 2011-10-28 14:45:05 +0200 | [diff] [blame] | 132 | if initializer is not None and not callable(initializer): | 
| Benjamin Peterson | f47ed4a | 2009-04-11 20:45:40 +0000 | [diff] [blame] | 133 | raise TypeError('initializer must be a callable') | 
|  | 134 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 135 | self._processes = processes | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 136 | self._pool = [] | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 137 | self._repopulate_pool() | 
|  | 138 |  | 
|  | 139 | self._worker_handler = threading.Thread( | 
|  | 140 | target=Pool._handle_workers, | 
|  | 141 | args=(self, ) | 
|  | 142 | ) | 
|  | 143 | self._worker_handler.daemon = True | 
|  | 144 | self._worker_handler._state = RUN | 
|  | 145 | self._worker_handler.start() | 
|  | 146 |  | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 147 |  | 
|  | 148 | self._task_handler = threading.Thread( | 
|  | 149 | target=Pool._handle_tasks, | 
| Richard Oudkerk | e90cedb | 2013-10-28 23:11:58 +0000 | [diff] [blame] | 150 | args=(self._taskqueue, self._quick_put, self._outqueue, | 
|  | 151 | self._pool, self._cache) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 152 | ) | 
| Benjamin Peterson | fae4c62 | 2008-08-18 18:40:08 +0000 | [diff] [blame] | 153 | self._task_handler.daemon = True | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 154 | self._task_handler._state = RUN | 
|  | 155 | self._task_handler.start() | 
|  | 156 |  | 
|  | 157 | self._result_handler = threading.Thread( | 
|  | 158 | target=Pool._handle_results, | 
|  | 159 | args=(self._outqueue, self._quick_get, self._cache) | 
|  | 160 | ) | 
| Benjamin Peterson | fae4c62 | 2008-08-18 18:40:08 +0000 | [diff] [blame] | 161 | self._result_handler.daemon = True | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 162 | self._result_handler._state = RUN | 
|  | 163 | self._result_handler.start() | 
|  | 164 |  | 
|  | 165 | self._terminate = Finalize( | 
|  | 166 | self, self._terminate_pool, | 
|  | 167 | args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 168 | self._worker_handler, self._task_handler, | 
|  | 169 | self._result_handler, self._cache), | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 170 | exitpriority=15 | 
|  | 171 | ) | 
|  | 172 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 173 | def _join_exited_workers(self): | 
|  | 174 | """Cleanup after any worker processes which have exited due to reaching | 
|  | 175 | their specified lifetime.  Returns True if any workers were cleaned up. | 
|  | 176 | """ | 
|  | 177 | cleaned = False | 
|  | 178 | for i in reversed(range(len(self._pool))): | 
|  | 179 | worker = self._pool[i] | 
|  | 180 | if worker.exitcode is not None: | 
|  | 181 | # worker exited | 
|  | 182 | debug('cleaning up worker %d' % i) | 
|  | 183 | worker.join() | 
|  | 184 | cleaned = True | 
|  | 185 | del self._pool[i] | 
|  | 186 | return cleaned | 
|  | 187 |  | 
|  | 188 | def _repopulate_pool(self): | 
|  | 189 | """Bring the number of pool processes up to the specified number, | 
|  | 190 | for use after reaping workers which have exited. | 
|  | 191 | """ | 
|  | 192 | for i in range(self._processes - len(self._pool)): | 
|  | 193 | w = self.Process(target=worker, | 
|  | 194 | args=(self._inqueue, self._outqueue, | 
|  | 195 | self._initializer, | 
|  | 196 | self._initargs, self._maxtasksperchild) | 
|  | 197 | ) | 
|  | 198 | self._pool.append(w) | 
|  | 199 | w.name = w.name.replace('Process', 'PoolWorker') | 
|  | 200 | w.daemon = True | 
|  | 201 | w.start() | 
|  | 202 | debug('added worker') | 
|  | 203 |  | 
|  | 204 | def _maintain_pool(self): | 
|  | 205 | """Clean up any exited workers and start replacements for them. | 
|  | 206 | """ | 
|  | 207 | if self._join_exited_workers(): | 
|  | 208 | self._repopulate_pool() | 
|  | 209 |  | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 210 | def _setup_queues(self): | 
|  | 211 | from .queues import SimpleQueue | 
|  | 212 | self._inqueue = SimpleQueue() | 
|  | 213 | self._outqueue = SimpleQueue() | 
|  | 214 | self._quick_put = self._inqueue._writer.send | 
|  | 215 | self._quick_get = self._outqueue._reader.recv | 
|  | 216 |  | 
|  | 217 | def apply(self, func, args=(), kwds={}): | 
|  | 218 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 219 | Equivalent of `func(*args, **kwds)`. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 220 | ''' | 
|  | 221 | assert self._state == RUN | 
|  | 222 | return self.apply_async(func, args, kwds).get() | 
|  | 223 |  | 
|  | 224 | def map(self, func, iterable, chunksize=None): | 
|  | 225 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 226 | Apply `func` to each element in `iterable`, collecting the results | 
|  | 227 | in a list that is returned. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 228 | ''' | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 229 | return self._map_async(func, iterable, mapstar, chunksize).get() | 
|  | 230 |  | 
|  | 231 | def starmap(self, func, iterable, chunksize=None): | 
|  | 232 | ''' | 
|  | 233 | Like `map()` method but the elements of the `iterable` are expected to | 
|  | 234 | be iterables as well and will be unpacked as arguments. Hence | 
|  | 235 | `func` and (a, b) becomes func(a, b). | 
|  | 236 | ''' | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 237 | return self._map_async(func, iterable, starmapstar, chunksize).get() | 
|  | 238 |  | 
|  | 239 | def starmap_async(self, func, iterable, chunksize=None, callback=None, | 
|  | 240 | error_callback=None): | 
|  | 241 | ''' | 
|  | 242 | Asynchronous version of `starmap()` method. | 
|  | 243 | ''' | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 244 | return self._map_async(func, iterable, starmapstar, chunksize, | 
|  | 245 | callback, error_callback) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 246 |  | 
|  | 247 | def imap(self, func, iterable, chunksize=1): | 
|  | 248 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 249 | Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 250 | ''' | 
| Benjamin Peterson | 3095f47 | 2012-09-25 12:45:42 -0400 | [diff] [blame] | 251 | if self._state != RUN: | 
|  | 252 | raise ValueError("Pool not running") | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 253 | if chunksize == 1: | 
|  | 254 | result = IMapIterator(self._cache) | 
|  | 255 | self._taskqueue.put((((result._job, i, func, (x,), {}) | 
|  | 256 | for i, x in enumerate(iterable)), result._set_length)) | 
|  | 257 | return result | 
|  | 258 | else: | 
|  | 259 | assert chunksize > 1 | 
|  | 260 | task_batches = Pool._get_tasks(func, iterable, chunksize) | 
|  | 261 | result = IMapIterator(self._cache) | 
|  | 262 | self._taskqueue.put((((result._job, i, mapstar, (x,), {}) | 
|  | 263 | for i, x in enumerate(task_batches)), result._set_length)) | 
|  | 264 | return (item for chunk in result for item in chunk) | 
|  | 265 |  | 
|  | 266 | def imap_unordered(self, func, iterable, chunksize=1): | 
|  | 267 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 268 | Like `imap()` method but ordering of results is arbitrary. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 269 | ''' | 
| Benjamin Peterson | 3095f47 | 2012-09-25 12:45:42 -0400 | [diff] [blame] | 270 | if self._state != RUN: | 
|  | 271 | raise ValueError("Pool not running") | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 272 | if chunksize == 1: | 
|  | 273 | result = IMapUnorderedIterator(self._cache) | 
|  | 274 | self._taskqueue.put((((result._job, i, func, (x,), {}) | 
|  | 275 | for i, x in enumerate(iterable)), result._set_length)) | 
|  | 276 | return result | 
|  | 277 | else: | 
|  | 278 | assert chunksize > 1 | 
|  | 279 | task_batches = Pool._get_tasks(func, iterable, chunksize) | 
|  | 280 | result = IMapUnorderedIterator(self._cache) | 
|  | 281 | self._taskqueue.put((((result._job, i, mapstar, (x,), {}) | 
|  | 282 | for i, x in enumerate(task_batches)), result._set_length)) | 
|  | 283 | return (item for chunk in result for item in chunk) | 
|  | 284 |  | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 285 | def apply_async(self, func, args=(), kwds={}, callback=None, | 
|  | 286 | error_callback=None): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 287 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 288 | Asynchronous version of `apply()` method. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 289 | ''' | 
| Benjamin Peterson | 3095f47 | 2012-09-25 12:45:42 -0400 | [diff] [blame] | 290 | if self._state != RUN: | 
|  | 291 | raise ValueError("Pool not running") | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 292 | result = ApplyResult(self._cache, callback, error_callback) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 293 | self._taskqueue.put(([(result._job, None, func, args, kwds)], None)) | 
|  | 294 | return result | 
|  | 295 |  | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 296 | def map_async(self, func, iterable, chunksize=None, callback=None, | 
|  | 297 | error_callback=None): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 298 | ''' | 
| Georg Brandl | 9290503 | 2008-11-22 08:51:39 +0000 | [diff] [blame] | 299 | Asynchronous version of `map()` method. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 300 | ''' | 
| Hynek Schlawack | 254af26 | 2012-10-27 12:53:02 +0200 | [diff] [blame] | 301 | return self._map_async(func, iterable, mapstar, chunksize, callback, | 
|  | 302 | error_callback) | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 303 |  | 
|  | 304 | def _map_async(self, func, iterable, mapper, chunksize=None, callback=None, | 
|  | 305 | error_callback=None): | 
|  | 306 | ''' | 
|  | 307 | Helper function to implement map, starmap and their async counterparts. | 
|  | 308 | ''' | 
| Benjamin Peterson | 3095f47 | 2012-09-25 12:45:42 -0400 | [diff] [blame] | 309 | if self._state != RUN: | 
|  | 310 | raise ValueError("Pool not running") | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 311 | if not hasattr(iterable, '__len__'): | 
|  | 312 | iterable = list(iterable) | 
|  | 313 |  | 
|  | 314 | if chunksize is None: | 
|  | 315 | chunksize, extra = divmod(len(iterable), len(self._pool) * 4) | 
|  | 316 | if extra: | 
|  | 317 | chunksize += 1 | 
| Alexandre Vassalotti | e52e378 | 2009-07-17 09:18:18 +0000 | [diff] [blame] | 318 | if len(iterable) == 0: | 
|  | 319 | chunksize = 0 | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 320 |  | 
|  | 321 | task_batches = Pool._get_tasks(func, iterable, chunksize) | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 322 | result = MapResult(self._cache, chunksize, len(iterable), callback, | 
|  | 323 | error_callback=error_callback) | 
| Antoine Pitrou | de911b2 | 2011-12-21 11:03:24 +0100 | [diff] [blame] | 324 | self._taskqueue.put((((result._job, i, mapper, (x,), {}) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 325 | for i, x in enumerate(task_batches)), None)) | 
|  | 326 | return result | 
|  | 327 |  | 
|  | 328 | @staticmethod | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 329 | def _handle_workers(pool): | 
| Charles-François Natali | f8859e1 | 2011-10-24 18:45:29 +0200 | [diff] [blame] | 330 | thread = threading.current_thread() | 
|  | 331 |  | 
|  | 332 | # Keep maintaining workers until the cache gets drained, unless the pool | 
|  | 333 | # is terminated. | 
|  | 334 | while thread._state == RUN or (pool._cache and thread._state != TERMINATE): | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 335 | pool._maintain_pool() | 
|  | 336 | time.sleep(0.1) | 
| Antoine Pitrou | 81dee6b | 2011-04-11 00:18:59 +0200 | [diff] [blame] | 337 | # send sentinel to stop workers | 
|  | 338 | pool._taskqueue.put(None) | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 339 | debug('worker handler exiting') | 
|  | 340 |  | 
|  | 341 | @staticmethod | 
| Richard Oudkerk | e90cedb | 2013-10-28 23:11:58 +0000 | [diff] [blame] | 342 | def _handle_tasks(taskqueue, put, outqueue, pool, cache): | 
| Benjamin Peterson | 672b803 | 2008-06-11 19:14:14 +0000 | [diff] [blame] | 343 | thread = threading.current_thread() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 344 |  | 
|  | 345 | for taskseq, set_length in iter(taskqueue.get, None): | 
|  | 346 | i = -1 | 
|  | 347 | for i, task in enumerate(taskseq): | 
|  | 348 | if thread._state: | 
|  | 349 | debug('task handler found thread._state != RUN') | 
|  | 350 | break | 
|  | 351 | try: | 
|  | 352 | put(task) | 
| Richard Oudkerk | e90cedb | 2013-10-28 23:11:58 +0000 | [diff] [blame] | 353 | except Exception as e: | 
|  | 354 | job, ind = task[:2] | 
|  | 355 | try: | 
|  | 356 | cache[job]._set(ind, (False, e)) | 
|  | 357 | except KeyError: | 
|  | 358 | pass | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 359 | else: | 
|  | 360 | if set_length: | 
|  | 361 | debug('doing set_length()') | 
|  | 362 | set_length(i+1) | 
|  | 363 | continue | 
|  | 364 | break | 
|  | 365 | else: | 
|  | 366 | debug('task handler got sentinel') | 
|  | 367 |  | 
|  | 368 |  | 
|  | 369 | try: | 
|  | 370 | # tell result handler to finish when cache is empty | 
|  | 371 | debug('task handler sending sentinel to result handler') | 
|  | 372 | outqueue.put(None) | 
|  | 373 |  | 
|  | 374 | # tell workers there is no more work | 
|  | 375 | debug('task handler sending sentinel to workers') | 
|  | 376 | for p in pool: | 
|  | 377 | put(None) | 
|  | 378 | except IOError: | 
|  | 379 | debug('task handler got IOError when sending sentinels') | 
|  | 380 |  | 
|  | 381 | debug('task handler exiting') | 
|  | 382 |  | 
|  | 383 | @staticmethod | 
|  | 384 | def _handle_results(outqueue, get, cache): | 
| Benjamin Peterson | 672b803 | 2008-06-11 19:14:14 +0000 | [diff] [blame] | 385 | thread = threading.current_thread() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 386 |  | 
|  | 387 | while 1: | 
|  | 388 | try: | 
|  | 389 | task = get() | 
|  | 390 | except (IOError, EOFError): | 
|  | 391 | debug('result handler got EOFError/IOError -- exiting') | 
|  | 392 | return | 
|  | 393 |  | 
|  | 394 | if thread._state: | 
|  | 395 | assert thread._state == TERMINATE | 
|  | 396 | debug('result handler found thread._state=TERMINATE') | 
|  | 397 | break | 
|  | 398 |  | 
|  | 399 | if task is None: | 
|  | 400 | debug('result handler got sentinel') | 
|  | 401 | break | 
|  | 402 |  | 
|  | 403 | job, i, obj = task | 
|  | 404 | try: | 
|  | 405 | cache[job]._set(i, obj) | 
|  | 406 | except KeyError: | 
|  | 407 | pass | 
|  | 408 |  | 
|  | 409 | while cache and thread._state != TERMINATE: | 
|  | 410 | try: | 
|  | 411 | task = get() | 
|  | 412 | except (IOError, EOFError): | 
|  | 413 | debug('result handler got EOFError/IOError -- exiting') | 
|  | 414 | return | 
|  | 415 |  | 
|  | 416 | if task is None: | 
|  | 417 | debug('result handler ignoring extra sentinel') | 
|  | 418 | continue | 
|  | 419 | job, i, obj = task | 
|  | 420 | try: | 
|  | 421 | cache[job]._set(i, obj) | 
|  | 422 | except KeyError: | 
|  | 423 | pass | 
|  | 424 |  | 
|  | 425 | if hasattr(outqueue, '_reader'): | 
|  | 426 | debug('ensuring that outqueue is not full') | 
|  | 427 | # If we don't make room available in outqueue then | 
|  | 428 | # attempts to add the sentinel (None) to outqueue may | 
|  | 429 | # block.  There is guaranteed to be no more than 2 sentinels. | 
|  | 430 | try: | 
|  | 431 | for i in range(10): | 
|  | 432 | if not outqueue._reader.poll(): | 
|  | 433 | break | 
|  | 434 | get() | 
|  | 435 | except (IOError, EOFError): | 
|  | 436 | pass | 
|  | 437 |  | 
|  | 438 | debug('result handler exiting: len(cache)=%s, thread._state=%s', | 
|  | 439 | len(cache), thread._state) | 
|  | 440 |  | 
|  | 441 | @staticmethod | 
|  | 442 | def _get_tasks(func, it, size): | 
|  | 443 | it = iter(it) | 
|  | 444 | while 1: | 
|  | 445 | x = tuple(itertools.islice(it, size)) | 
|  | 446 | if not x: | 
|  | 447 | return | 
|  | 448 | yield (func, x) | 
|  | 449 |  | 
|  | 450 | def __reduce__(self): | 
|  | 451 | raise NotImplementedError( | 
|  | 452 | 'pool objects cannot be passed between processes or pickled' | 
|  | 453 | ) | 
|  | 454 |  | 
|  | 455 | def close(self): | 
|  | 456 | debug('closing pool') | 
|  | 457 | if self._state == RUN: | 
|  | 458 | self._state = CLOSE | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 459 | self._worker_handler._state = CLOSE | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 460 |  | 
|  | 461 | def terminate(self): | 
|  | 462 | debug('terminating pool') | 
|  | 463 | self._state = TERMINATE | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 464 | self._worker_handler._state = TERMINATE | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 465 | self._terminate() | 
|  | 466 |  | 
|  | 467 | def join(self): | 
|  | 468 | debug('joining pool') | 
|  | 469 | assert self._state in (CLOSE, TERMINATE) | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 470 | self._worker_handler.join() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 471 | self._task_handler.join() | 
|  | 472 | self._result_handler.join() | 
|  | 473 | for p in self._pool: | 
|  | 474 | p.join() | 
|  | 475 |  | 
|  | 476 | @staticmethod | 
|  | 477 | def _help_stuff_finish(inqueue, task_handler, size): | 
|  | 478 | # task_handler may be blocked trying to put items on inqueue | 
|  | 479 | debug('removing tasks from inqueue until task handler finished') | 
|  | 480 | inqueue._rlock.acquire() | 
| Benjamin Peterson | 672b803 | 2008-06-11 19:14:14 +0000 | [diff] [blame] | 481 | while task_handler.is_alive() and inqueue._reader.poll(): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 482 | inqueue._reader.recv() | 
|  | 483 | time.sleep(0) | 
|  | 484 |  | 
|  | 485 | @classmethod | 
|  | 486 | def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 487 | worker_handler, task_handler, result_handler, cache): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 488 | # this is guaranteed to only be called once | 
|  | 489 | debug('finalizing pool') | 
|  | 490 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 491 | worker_handler._state = TERMINATE | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 492 | task_handler._state = TERMINATE | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 493 |  | 
|  | 494 | debug('helping task handler/workers to finish') | 
|  | 495 | cls._help_stuff_finish(inqueue, task_handler, len(pool)) | 
|  | 496 |  | 
| Benjamin Peterson | 672b803 | 2008-06-11 19:14:14 +0000 | [diff] [blame] | 497 | assert result_handler.is_alive() or len(cache) == 0 | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 498 |  | 
|  | 499 | result_handler._state = TERMINATE | 
|  | 500 | outqueue.put(None)                  # sentinel | 
|  | 501 |  | 
| Antoine Pitrou | 81dee6b | 2011-04-11 00:18:59 +0200 | [diff] [blame] | 502 | # We must wait for the worker handler to exit before terminating | 
|  | 503 | # workers because we don't want workers to be restarted behind our back. | 
|  | 504 | debug('joining worker handler') | 
| Richard Oudkerk | f29ec4b | 2012-06-18 15:54:57 +0100 | [diff] [blame] | 505 | if threading.current_thread() is not worker_handler: | 
|  | 506 | worker_handler.join() | 
| Antoine Pitrou | 81dee6b | 2011-04-11 00:18:59 +0200 | [diff] [blame] | 507 |  | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 508 | # Terminate workers which haven't already finished. | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 509 | if pool and hasattr(pool[0], 'terminate'): | 
|  | 510 | debug('terminating workers') | 
|  | 511 | for p in pool: | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 512 | if p.exitcode is None: | 
|  | 513 | p.terminate() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 514 |  | 
|  | 515 | debug('joining task handler') | 
| Richard Oudkerk | f29ec4b | 2012-06-18 15:54:57 +0100 | [diff] [blame] | 516 | if threading.current_thread() is not task_handler: | 
|  | 517 | task_handler.join() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 518 |  | 
|  | 519 | debug('joining result handler') | 
| Richard Oudkerk | f29ec4b | 2012-06-18 15:54:57 +0100 | [diff] [blame] | 520 | if threading.current_thread() is not result_handler: | 
|  | 521 | result_handler.join() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 522 |  | 
|  | 523 | if pool and hasattr(pool[0], 'terminate'): | 
|  | 524 | debug('joining pool workers') | 
|  | 525 | for p in pool: | 
| Florent Xicluna | 998171f | 2010-03-08 13:32:17 +0000 | [diff] [blame] | 526 | if p.is_alive(): | 
| Jesse Noller | 1f0b658 | 2010-01-27 03:36:01 +0000 | [diff] [blame] | 527 | # worker has not yet exited | 
| Florent Xicluna | 998171f | 2010-03-08 13:32:17 +0000 | [diff] [blame] | 528 | debug('cleaning up worker %d' % p.pid) | 
|  | 529 | p.join() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 530 |  | 
| Richard Oudkerk | d69cfe8 | 2012-06-18 17:47:52 +0100 | [diff] [blame] | 531 | def __enter__(self): | 
|  | 532 | return self | 
|  | 533 |  | 
|  | 534 | def __exit__(self, exc_type, exc_val, exc_tb): | 
|  | 535 | self.terminate() | 
|  | 536 |  | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 537 | # | 
|  | 538 | # Class whose instances are returned by `Pool.apply_async()` | 
|  | 539 | # | 
|  | 540 |  | 
|  | 541 | class ApplyResult(object): | 
|  | 542 |  | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 543 | def __init__(self, cache, callback, error_callback): | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 544 | self._event = threading.Event() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 545 | self._job = next(job_counter) | 
|  | 546 | self._cache = cache | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 547 | self._callback = callback | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 548 | self._error_callback = error_callback | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 549 | cache[self._job] = self | 
|  | 550 |  | 
|  | 551 | def ready(self): | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 552 | return self._event.is_set() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 553 |  | 
|  | 554 | def successful(self): | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 555 | assert self.ready() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 556 | return self._success | 
|  | 557 |  | 
|  | 558 | def wait(self, timeout=None): | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 559 | self._event.wait(timeout) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 560 |  | 
|  | 561 | def get(self, timeout=None): | 
|  | 562 | self.wait(timeout) | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 563 | if not self.ready(): | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 564 | raise TimeoutError | 
|  | 565 | if self._success: | 
|  | 566 | return self._value | 
|  | 567 | else: | 
|  | 568 | raise self._value | 
|  | 569 |  | 
|  | 570 | def _set(self, i, obj): | 
|  | 571 | self._success, self._value = obj | 
|  | 572 | if self._callback and self._success: | 
|  | 573 | self._callback(self._value) | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 574 | if self._error_callback and not self._success: | 
|  | 575 | self._error_callback(self._value) | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 576 | self._event.set() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 577 | del self._cache[self._job] | 
|  | 578 |  | 
| Richard Oudkerk | def51ca | 2013-05-06 12:10:04 +0100 | [diff] [blame] | 579 | AsyncResult = ApplyResult       # create alias -- see #17805 | 
|  | 580 |  | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 581 | # | 
|  | 582 | # Class whose instances are returned by `Pool.map_async()` | 
|  | 583 | # | 
|  | 584 |  | 
|  | 585 | class MapResult(ApplyResult): | 
|  | 586 |  | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 587 | def __init__(self, cache, chunksize, length, callback, error_callback): | 
|  | 588 | ApplyResult.__init__(self, cache, callback, | 
|  | 589 | error_callback=error_callback) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 590 | self._success = True | 
|  | 591 | self._value = [None] * length | 
|  | 592 | self._chunksize = chunksize | 
|  | 593 | if chunksize <= 0: | 
|  | 594 | self._number_left = 0 | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 595 | self._event.set() | 
| Richard Oudkerk | e41682b | 2012-06-06 19:04:57 +0100 | [diff] [blame] | 596 | del cache[self._job] | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 597 | else: | 
|  | 598 | self._number_left = length//chunksize + bool(length % chunksize) | 
|  | 599 |  | 
|  | 600 | def _set(self, i, success_result): | 
|  | 601 | success, result = success_result | 
|  | 602 | if success: | 
|  | 603 | self._value[i*self._chunksize:(i+1)*self._chunksize] = result | 
|  | 604 | self._number_left -= 1 | 
|  | 605 | if self._number_left == 0: | 
|  | 606 | if self._callback: | 
|  | 607 | self._callback(self._value) | 
|  | 608 | del self._cache[self._job] | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 609 | self._event.set() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 610 | else: | 
|  | 611 | self._success = False | 
|  | 612 | self._value = result | 
| Ask Solem | 2afcbf2 | 2010-11-09 20:55:52 +0000 | [diff] [blame] | 613 | if self._error_callback: | 
|  | 614 | self._error_callback(self._value) | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 615 | del self._cache[self._job] | 
| Richard Oudkerk | 692130a | 2012-05-25 13:26:53 +0100 | [diff] [blame] | 616 | self._event.set() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 617 |  | 
|  | 618 | # | 
|  | 619 | # Class whose instances are returned by `Pool.imap()` | 
|  | 620 | # | 
|  | 621 |  | 
|  | 622 | class IMapIterator(object): | 
|  | 623 |  | 
|  | 624 | def __init__(self, cache): | 
|  | 625 | self._cond = threading.Condition(threading.Lock()) | 
|  | 626 | self._job = next(job_counter) | 
|  | 627 | self._cache = cache | 
|  | 628 | self._items = collections.deque() | 
|  | 629 | self._index = 0 | 
|  | 630 | self._length = None | 
|  | 631 | self._unsorted = {} | 
|  | 632 | cache[self._job] = self | 
|  | 633 |  | 
|  | 634 | def __iter__(self): | 
|  | 635 | return self | 
|  | 636 |  | 
|  | 637 | def next(self, timeout=None): | 
|  | 638 | self._cond.acquire() | 
|  | 639 | try: | 
|  | 640 | try: | 
|  | 641 | item = self._items.popleft() | 
|  | 642 | except IndexError: | 
|  | 643 | if self._index == self._length: | 
|  | 644 | raise StopIteration | 
|  | 645 | self._cond.wait(timeout) | 
|  | 646 | try: | 
|  | 647 | item = self._items.popleft() | 
|  | 648 | except IndexError: | 
|  | 649 | if self._index == self._length: | 
|  | 650 | raise StopIteration | 
|  | 651 | raise TimeoutError | 
|  | 652 | finally: | 
|  | 653 | self._cond.release() | 
|  | 654 |  | 
|  | 655 | success, value = item | 
|  | 656 | if success: | 
|  | 657 | return value | 
|  | 658 | raise value | 
|  | 659 |  | 
|  | 660 | __next__ = next                    # XXX | 
|  | 661 |  | 
|  | 662 | def _set(self, i, obj): | 
|  | 663 | self._cond.acquire() | 
|  | 664 | try: | 
|  | 665 | if self._index == i: | 
|  | 666 | self._items.append(obj) | 
|  | 667 | self._index += 1 | 
|  | 668 | while self._index in self._unsorted: | 
|  | 669 | obj = self._unsorted.pop(self._index) | 
|  | 670 | self._items.append(obj) | 
|  | 671 | self._index += 1 | 
|  | 672 | self._cond.notify() | 
|  | 673 | else: | 
|  | 674 | self._unsorted[i] = obj | 
|  | 675 |  | 
|  | 676 | if self._index == self._length: | 
|  | 677 | del self._cache[self._job] | 
|  | 678 | finally: | 
|  | 679 | self._cond.release() | 
|  | 680 |  | 
|  | 681 | def _set_length(self, length): | 
|  | 682 | self._cond.acquire() | 
|  | 683 | try: | 
|  | 684 | self._length = length | 
|  | 685 | if self._index == self._length: | 
|  | 686 | self._cond.notify() | 
|  | 687 | del self._cache[self._job] | 
|  | 688 | finally: | 
|  | 689 | self._cond.release() | 
|  | 690 |  | 
|  | 691 | # | 
|  | 692 | # Class whose instances are returned by `Pool.imap_unordered()` | 
|  | 693 | # | 
|  | 694 |  | 
|  | 695 | class IMapUnorderedIterator(IMapIterator): | 
|  | 696 |  | 
|  | 697 | def _set(self, i, obj): | 
|  | 698 | self._cond.acquire() | 
|  | 699 | try: | 
|  | 700 | self._items.append(obj) | 
|  | 701 | self._index += 1 | 
|  | 702 | self._cond.notify() | 
|  | 703 | if self._index == self._length: | 
|  | 704 | del self._cache[self._job] | 
|  | 705 | finally: | 
|  | 706 | self._cond.release() | 
|  | 707 |  | 
|  | 708 | # | 
|  | 709 | # | 
|  | 710 | # | 
|  | 711 |  | 
|  | 712 | class ThreadPool(Pool): | 
|  | 713 |  | 
|  | 714 | from .dummy import Process | 
|  | 715 |  | 
|  | 716 | def __init__(self, processes=None, initializer=None, initargs=()): | 
|  | 717 | Pool.__init__(self, processes, initializer, initargs) | 
|  | 718 |  | 
|  | 719 | def _setup_queues(self): | 
|  | 720 | self._inqueue = queue.Queue() | 
|  | 721 | self._outqueue = queue.Queue() | 
|  | 722 | self._quick_put = self._inqueue.put | 
|  | 723 | self._quick_get = self._outqueue.get | 
|  | 724 |  | 
|  | 725 | @staticmethod | 
|  | 726 | def _help_stuff_finish(inqueue, task_handler, size): | 
|  | 727 | # put sentinels at head of inqueue to make workers finish | 
|  | 728 | inqueue.not_empty.acquire() | 
|  | 729 | try: | 
|  | 730 | inqueue.queue.clear() | 
|  | 731 | inqueue.queue.extend([None] * size) | 
| Benjamin Peterson | 672b803 | 2008-06-11 19:14:14 +0000 | [diff] [blame] | 732 | inqueue.not_empty.notify_all() | 
| Benjamin Peterson | e711caf | 2008-06-11 16:44:04 +0000 | [diff] [blame] | 733 | finally: | 
|  | 734 | inqueue.not_empty.release() |