Antoine Pitrou | 9a38682 | 2010-01-18 21:10:31 +0000 | [diff] [blame^] | 1 | # -*- coding: utf-8 -*- |
| 2 | # This file should be kept compatible with both Python 2.6 and Python >= 3.0. |
| 3 | |
| 4 | from __future__ import division |
| 5 | from __future__ import print_function |
| 6 | |
| 7 | """ |
| 8 | ccbench, a Python concurrency benchmark. |
| 9 | """ |
| 10 | |
| 11 | import time |
| 12 | import os |
| 13 | import sys |
| 14 | import functools |
| 15 | import itertools |
| 16 | import threading |
| 17 | import subprocess |
| 18 | import socket |
| 19 | from optparse import OptionParser, SUPPRESS_HELP |
| 20 | import platform |
| 21 | |
| 22 | # Compatibility |
| 23 | try: |
| 24 | xrange |
| 25 | except NameError: |
| 26 | xrange = range |
| 27 | |
| 28 | try: |
| 29 | map = itertools.imap |
| 30 | except AttributeError: |
| 31 | pass |
| 32 | |
| 33 | |
| 34 | THROUGHPUT_DURATION = 2.0 |
| 35 | |
| 36 | LATENCY_PING_INTERVAL = 0.1 |
| 37 | LATENCY_DURATION = 2.0 |
| 38 | |
| 39 | |
| 40 | def task_pidigits(): |
| 41 | """Pi calculation (Python)""" |
| 42 | _map = map |
| 43 | _count = itertools.count |
| 44 | _islice = itertools.islice |
| 45 | |
| 46 | def calc_ndigits(n): |
| 47 | # From http://shootout.alioth.debian.org/ |
| 48 | def gen_x(): |
| 49 | return _map(lambda k: (k, 4*k + 2, 0, 2*k + 1), _count(1)) |
| 50 | |
| 51 | def compose(a, b): |
| 52 | aq, ar, as_, at = a |
| 53 | bq, br, bs, bt = b |
| 54 | return (aq * bq, |
| 55 | aq * br + ar * bt, |
| 56 | as_ * bq + at * bs, |
| 57 | as_ * br + at * bt) |
| 58 | |
| 59 | def extract(z, j): |
| 60 | q, r, s, t = z |
| 61 | return (q*j + r) // (s*j + t) |
| 62 | |
| 63 | def pi_digits(): |
| 64 | z = (1, 0, 0, 1) |
| 65 | x = gen_x() |
| 66 | while 1: |
| 67 | y = extract(z, 3) |
| 68 | while y != extract(z, 4): |
| 69 | z = compose(z, next(x)) |
| 70 | y = extract(z, 3) |
| 71 | z = compose((10, -10*y, 0, 1), z) |
| 72 | yield y |
| 73 | |
| 74 | return list(_islice(pi_digits(), n)) |
| 75 | |
| 76 | return calc_ndigits, (50, ) |
| 77 | |
| 78 | def task_regex(): |
| 79 | """regular expression (C)""" |
| 80 | # XXX this task gives horrendous latency results. |
| 81 | import re |
| 82 | # Taken from the `inspect` module |
| 83 | pat = re.compile(r'^(\s*def\s)|(.*(?<!\w)lambda(:|\s))|^(\s*@)', re.MULTILINE) |
| 84 | with open(__file__, "r") as f: |
| 85 | arg = f.read(2000) |
| 86 | |
| 87 | def findall(s): |
| 88 | t = time.time() |
| 89 | try: |
| 90 | return pat.findall(s) |
| 91 | finally: |
| 92 | print(time.time() - t) |
| 93 | return pat.findall, (arg, ) |
| 94 | |
| 95 | def task_sort(): |
| 96 | """list sorting (C)""" |
| 97 | def list_sort(l): |
| 98 | l = l[::-1] |
| 99 | l.sort() |
| 100 | |
| 101 | return list_sort, (list(range(1000)), ) |
| 102 | |
| 103 | def task_compress_zlib(): |
| 104 | """zlib compression (C)""" |
| 105 | import zlib |
| 106 | with open(__file__, "rb") as f: |
| 107 | arg = f.read(5000) * 3 |
| 108 | |
| 109 | def compress(s): |
| 110 | zlib.decompress(zlib.compress(s, 5)) |
| 111 | return compress, (arg, ) |
| 112 | |
| 113 | def task_compress_bz2(): |
| 114 | """bz2 compression (C)""" |
| 115 | import bz2 |
| 116 | with open(__file__, "rb") as f: |
| 117 | arg = f.read(3000) * 2 |
| 118 | |
| 119 | def compress(s): |
| 120 | bz2.compress(s) |
| 121 | return compress, (arg, ) |
| 122 | |
| 123 | def task_hashing(): |
| 124 | """SHA1 hashing (C)""" |
| 125 | import hashlib |
| 126 | with open(__file__, "rb") as f: |
| 127 | arg = f.read(5000) * 30 |
| 128 | |
| 129 | def compute(s): |
| 130 | hashlib.sha1(s).digest() |
| 131 | return compute, (arg, ) |
| 132 | |
| 133 | |
| 134 | throughput_tasks = [task_pidigits, task_regex] |
| 135 | for mod in 'bz2', 'hashlib': |
| 136 | try: |
| 137 | globals()[mod] = __import__(mod) |
| 138 | except ImportError: |
| 139 | globals()[mod] = None |
| 140 | |
| 141 | # For whatever reasons, zlib gives irregular results, so we prefer bz2 or |
| 142 | # hashlib if available. |
| 143 | # (NOTE: hashlib releases the GIL from 2.7 and 3.1 onwards) |
| 144 | if bz2 is not None: |
| 145 | throughput_tasks.append(task_compress_bz2) |
| 146 | elif hashlib is not None: |
| 147 | throughput_tasks.append(task_hashing) |
| 148 | else: |
| 149 | throughput_tasks.append(task_compress_zlib) |
| 150 | |
| 151 | latency_tasks = throughput_tasks |
| 152 | |
| 153 | |
| 154 | class TimedLoop: |
| 155 | def __init__(self, func, args): |
| 156 | self.func = func |
| 157 | self.args = args |
| 158 | |
| 159 | def __call__(self, start_time, min_duration, end_event, do_yield=False): |
| 160 | step = 20 |
| 161 | niters = 0 |
| 162 | duration = 0.0 |
| 163 | _time = time.time |
| 164 | _sleep = time.sleep |
| 165 | _func = self.func |
| 166 | _args = self.args |
| 167 | t1 = start_time |
| 168 | while True: |
| 169 | for i in range(step): |
| 170 | _func(*_args) |
| 171 | t2 = _time() |
| 172 | # If another thread terminated, the current measurement is invalid |
| 173 | # => return the previous one. |
| 174 | if end_event: |
| 175 | return niters, duration |
| 176 | niters += step |
| 177 | duration = t2 - start_time |
| 178 | if duration >= min_duration: |
| 179 | end_event.append(None) |
| 180 | return niters, duration |
| 181 | if t2 - t1 < 0.01: |
| 182 | # Minimize interference of measurement on overall runtime |
| 183 | step = step * 3 // 2 |
| 184 | elif do_yield: |
| 185 | # OS scheduling of Python threads is sometimes so bad that we |
| 186 | # have to force thread switching ourselves, otherwise we get |
| 187 | # completely useless results. |
| 188 | _sleep(0.0001) |
| 189 | t1 = t2 |
| 190 | |
| 191 | |
| 192 | def run_throughput_test(func, args, nthreads): |
| 193 | assert nthreads >= 1 |
| 194 | |
| 195 | # Warm up |
| 196 | func(*args) |
| 197 | |
| 198 | results = [] |
| 199 | loop = TimedLoop(func, args) |
| 200 | end_event = [] |
| 201 | |
| 202 | if nthreads == 1: |
| 203 | # Pure single-threaded performance, without any switching or |
| 204 | # synchronization overhead. |
| 205 | start_time = time.time() |
| 206 | results.append(loop(start_time, THROUGHPUT_DURATION, |
| 207 | end_event, do_yield=False)) |
| 208 | return results |
| 209 | |
| 210 | started = False |
| 211 | ready_cond = threading.Condition() |
| 212 | start_cond = threading.Condition() |
| 213 | ready = [] |
| 214 | |
| 215 | def run(): |
| 216 | with ready_cond: |
| 217 | ready.append(None) |
| 218 | ready_cond.notify() |
| 219 | with start_cond: |
| 220 | while not started: |
| 221 | start_cond.wait() |
| 222 | results.append(loop(start_time, THROUGHPUT_DURATION, |
| 223 | end_event, do_yield=True)) |
| 224 | |
| 225 | threads = [] |
| 226 | for i in range(nthreads): |
| 227 | threads.append(threading.Thread(target=run)) |
| 228 | for t in threads: |
| 229 | t.setDaemon(True) |
| 230 | t.start() |
| 231 | # We don't want measurements to include thread startup overhead, |
| 232 | # so we arrange for timing to start after all threads are ready. |
| 233 | with ready_cond: |
| 234 | while len(ready) < nthreads: |
| 235 | ready_cond.wait() |
| 236 | with start_cond: |
| 237 | start_time = time.time() |
| 238 | started = True |
| 239 | start_cond.notify(nthreads) |
| 240 | for t in threads: |
| 241 | t.join() |
| 242 | |
| 243 | return results |
| 244 | |
| 245 | def run_throughput_tests(max_threads): |
| 246 | for task in throughput_tasks: |
| 247 | print(task.__doc__) |
| 248 | print() |
| 249 | func, args = task() |
| 250 | nthreads = 1 |
| 251 | baseline_speed = None |
| 252 | while nthreads <= max_threads: |
| 253 | results = run_throughput_test(func, args, nthreads) |
| 254 | # Taking the max duration rather than average gives pessimistic |
| 255 | # results rather than optimistic. |
| 256 | speed = sum(r[0] for r in results) / max(r[1] for r in results) |
| 257 | print("threads=%d: %d" % (nthreads, speed), end="") |
| 258 | if baseline_speed is None: |
| 259 | print(" iterations/s.") |
| 260 | baseline_speed = speed |
| 261 | else: |
| 262 | print(" ( %d %%)" % (speed / baseline_speed * 100)) |
| 263 | nthreads += 1 |
| 264 | print() |
| 265 | |
| 266 | |
| 267 | LAT_END = "END" |
| 268 | |
| 269 | def _sendto(sock, s, addr): |
| 270 | sock.sendto(s.encode('ascii'), addr) |
| 271 | |
| 272 | def _recv(sock, n): |
| 273 | return sock.recv(n).decode('ascii') |
| 274 | |
| 275 | def latency_client(addr, nb_pings, interval): |
| 276 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
| 277 | _time = time.time |
| 278 | _sleep = time.sleep |
| 279 | def _ping(): |
| 280 | _sendto(sock, "%r\n" % _time(), addr) |
| 281 | # The first ping signals the parent process that we are ready. |
| 282 | _ping() |
| 283 | # We give the parent a bit of time to notice. |
| 284 | _sleep(1.0) |
| 285 | for i in range(nb_pings): |
| 286 | _sleep(interval) |
| 287 | _ping() |
| 288 | _sendto(sock, LAT_END + "\n", addr) |
| 289 | |
| 290 | def run_latency_client(**kwargs): |
| 291 | cmd_line = [sys.executable, '-E', os.path.abspath(__file__)] |
| 292 | cmd_line.extend(['--latclient', repr(kwargs)]) |
| 293 | return subprocess.Popen(cmd_line) #, stdin=subprocess.PIPE, |
| 294 | #stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| 295 | |
| 296 | def run_latency_test(func, args, nthreads): |
| 297 | # Create a listening socket to receive the pings. We use UDP which should |
| 298 | # be painlessly cross-platform. |
| 299 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
| 300 | sock.bind(("127.0.0.1", 0)) |
| 301 | addr = sock.getsockname() |
| 302 | |
| 303 | interval = LATENCY_PING_INTERVAL |
| 304 | duration = LATENCY_DURATION |
| 305 | nb_pings = int(duration / interval) |
| 306 | |
| 307 | results = [] |
| 308 | threads = [] |
| 309 | end_event = [] |
| 310 | start_cond = threading.Condition() |
| 311 | started = False |
| 312 | if nthreads > 0: |
| 313 | # Warm up |
| 314 | func(*args) |
| 315 | |
| 316 | results = [] |
| 317 | loop = TimedLoop(func, args) |
| 318 | ready = [] |
| 319 | ready_cond = threading.Condition() |
| 320 | |
| 321 | def run(): |
| 322 | with ready_cond: |
| 323 | ready.append(None) |
| 324 | ready_cond.notify() |
| 325 | with start_cond: |
| 326 | while not started: |
| 327 | start_cond.wait() |
| 328 | loop(start_time, duration * 1.5, end_event, do_yield=False) |
| 329 | |
| 330 | for i in range(nthreads): |
| 331 | threads.append(threading.Thread(target=run)) |
| 332 | for t in threads: |
| 333 | t.setDaemon(True) |
| 334 | t.start() |
| 335 | # Wait for threads to be ready |
| 336 | with ready_cond: |
| 337 | while len(ready) < nthreads: |
| 338 | ready_cond.wait() |
| 339 | |
| 340 | # Run the client and wait for the first ping(s) to arrive before |
| 341 | # unblocking the background threads. |
| 342 | chunks = [] |
| 343 | process = run_latency_client(addr=sock.getsockname(), |
| 344 | nb_pings=nb_pings, interval=interval) |
| 345 | s = _recv(sock, 4096) |
| 346 | _time = time.time |
| 347 | |
| 348 | with start_cond: |
| 349 | start_time = _time() |
| 350 | started = True |
| 351 | start_cond.notify(nthreads) |
| 352 | |
| 353 | while LAT_END not in s: |
| 354 | s = _recv(sock, 4096) |
| 355 | t = _time() |
| 356 | chunks.append((t, s)) |
| 357 | |
| 358 | # Tell the background threads to stop. |
| 359 | end_event.append(None) |
| 360 | for t in threads: |
| 361 | t.join() |
| 362 | process.wait() |
| 363 | |
| 364 | for recv_time, chunk in chunks: |
| 365 | # NOTE: it is assumed that a line sent by a client wasn't received |
| 366 | # in two chunks because the lines are very small. |
| 367 | for line in chunk.splitlines(): |
| 368 | line = line.strip() |
| 369 | if line and line != LAT_END: |
| 370 | send_time = eval(line) |
| 371 | assert isinstance(send_time, float) |
| 372 | results.append((send_time, recv_time)) |
| 373 | |
| 374 | return results |
| 375 | |
| 376 | def run_latency_tests(max_threads): |
| 377 | for task in latency_tasks: |
| 378 | print("Background CPU task:", task.__doc__) |
| 379 | print() |
| 380 | func, args = task() |
| 381 | nthreads = 0 |
| 382 | while nthreads <= max_threads: |
| 383 | results = run_latency_test(func, args, nthreads) |
| 384 | n = len(results) |
| 385 | # We print out milliseconds |
| 386 | lats = [1000 * (t2 - t1) for (t1, t2) in results] |
| 387 | #print(list(map(int, lats))) |
| 388 | avg = sum(lats) / n |
| 389 | dev = (sum((x - avg) ** 2 for x in lats) / n) ** 0.5 |
| 390 | print("CPU threads=%d: %d ms. (std dev: %d ms.)" % (nthreads, avg, dev), end="") |
| 391 | print() |
| 392 | #print(" [... from %d samples]" % n) |
| 393 | nthreads += 1 |
| 394 | print() |
| 395 | |
| 396 | |
| 397 | def main(): |
| 398 | usage = "usage: %prog [-h|--help] [options]" |
| 399 | parser = OptionParser(usage=usage) |
| 400 | parser.add_option("-t", "--throughput", |
| 401 | action="store_true", dest="throughput", default=False, |
| 402 | help="run throughput tests") |
| 403 | parser.add_option("-l", "--latency", |
| 404 | action="store_true", dest="latency", default=False, |
| 405 | help="run latency tests") |
| 406 | parser.add_option("-i", "--interval", |
| 407 | action="store", type="int", dest="check_interval", default=None, |
| 408 | help="sys.setcheckinterval() value") |
| 409 | parser.add_option("-I", "--switch-interval", |
| 410 | action="store", type="float", dest="switch_interval", default=None, |
| 411 | help="sys.setswitchinterval() value") |
| 412 | parser.add_option("-n", "--num-threads", |
| 413 | action="store", type="int", dest="nthreads", default=4, |
| 414 | help="max number of threads in tests") |
| 415 | |
| 416 | # Hidden option to run the pinging client |
| 417 | parser.add_option("", "--latclient", |
| 418 | action="store", dest="latclient", default=None, |
| 419 | help=SUPPRESS_HELP) |
| 420 | |
| 421 | options, args = parser.parse_args() |
| 422 | if args: |
| 423 | parser.error("unexpected arguments") |
| 424 | |
| 425 | if options.latclient: |
| 426 | kwargs = eval(options.latclient) |
| 427 | latency_client(**kwargs) |
| 428 | return |
| 429 | |
| 430 | if not options.throughput and not options.latency: |
| 431 | options.throughput = options.latency = True |
| 432 | if options.check_interval: |
| 433 | sys.setcheckinterval(options.check_interval) |
| 434 | if options.switch_interval: |
| 435 | sys.setswitchinterval(options.switch_interval) |
| 436 | |
| 437 | print("== %s %s (%s) ==" % ( |
| 438 | platform.python_implementation(), |
| 439 | platform.python_version(), |
| 440 | platform.python_build()[0], |
| 441 | )) |
| 442 | # Processor identification often has repeated spaces |
| 443 | cpu = ' '.join(platform.processor().split()) |
| 444 | print("== %s %s on '%s' ==" % ( |
| 445 | platform.machine(), |
| 446 | platform.system(), |
| 447 | cpu, |
| 448 | )) |
| 449 | print() |
| 450 | |
| 451 | if options.throughput: |
| 452 | print("--- Throughput ---") |
| 453 | print() |
| 454 | run_throughput_tests(options.nthreads) |
| 455 | |
| 456 | if options.latency: |
| 457 | print("--- Latency ---") |
| 458 | print() |
| 459 | run_latency_tests(options.nthreads) |
| 460 | |
| 461 | if __name__ == "__main__": |
| 462 | main() |