blob: 0b9301284f2971742e52408a57bd29bea519a071 [file] [log] [blame]
Antoine Pitrou9a386822010-01-18 21:10:31 +00001# -*- coding: utf-8 -*-
2# This file should be kept compatible with both Python 2.6 and Python >= 3.0.
3
4from __future__ import division
5from __future__ import print_function
6
7"""
8ccbench, a Python concurrency benchmark.
9"""
10
11import time
12import os
13import sys
14import functools
15import itertools
16import threading
17import subprocess
18import socket
19from optparse import OptionParser, SUPPRESS_HELP
20import platform
21
22# Compatibility
23try:
24 xrange
25except NameError:
26 xrange = range
27
28try:
29 map = itertools.imap
30except AttributeError:
31 pass
32
33
34THROUGHPUT_DURATION = 2.0
35
36LATENCY_PING_INTERVAL = 0.1
37LATENCY_DURATION = 2.0
38
39
40def task_pidigits():
41 """Pi calculation (Python)"""
42 _map = map
43 _count = itertools.count
44 _islice = itertools.islice
45
46 def calc_ndigits(n):
47 # From http://shootout.alioth.debian.org/
48 def gen_x():
49 return _map(lambda k: (k, 4*k + 2, 0, 2*k + 1), _count(1))
50
51 def compose(a, b):
52 aq, ar, as_, at = a
53 bq, br, bs, bt = b
54 return (aq * bq,
55 aq * br + ar * bt,
56 as_ * bq + at * bs,
57 as_ * br + at * bt)
58
59 def extract(z, j):
60 q, r, s, t = z
61 return (q*j + r) // (s*j + t)
62
63 def pi_digits():
64 z = (1, 0, 0, 1)
65 x = gen_x()
66 while 1:
67 y = extract(z, 3)
68 while y != extract(z, 4):
69 z = compose(z, next(x))
70 y = extract(z, 3)
71 z = compose((10, -10*y, 0, 1), z)
72 yield y
73
74 return list(_islice(pi_digits(), n))
75
76 return calc_ndigits, (50, )
77
78def task_regex():
79 """regular expression (C)"""
80 # XXX this task gives horrendous latency results.
81 import re
82 # Taken from the `inspect` module
83 pat = re.compile(r'^(\s*def\s)|(.*(?<!\w)lambda(:|\s))|^(\s*@)', re.MULTILINE)
84 with open(__file__, "r") as f:
85 arg = f.read(2000)
86
87 def findall(s):
88 t = time.time()
89 try:
90 return pat.findall(s)
91 finally:
92 print(time.time() - t)
93 return pat.findall, (arg, )
94
95def task_sort():
96 """list sorting (C)"""
97 def list_sort(l):
98 l = l[::-1]
99 l.sort()
100
101 return list_sort, (list(range(1000)), )
102
103def task_compress_zlib():
104 """zlib compression (C)"""
105 import zlib
106 with open(__file__, "rb") as f:
107 arg = f.read(5000) * 3
108
109 def compress(s):
110 zlib.decompress(zlib.compress(s, 5))
111 return compress, (arg, )
112
113def task_compress_bz2():
114 """bz2 compression (C)"""
115 import bz2
116 with open(__file__, "rb") as f:
117 arg = f.read(3000) * 2
118
119 def compress(s):
120 bz2.compress(s)
121 return compress, (arg, )
122
123def task_hashing():
124 """SHA1 hashing (C)"""
125 import hashlib
126 with open(__file__, "rb") as f:
127 arg = f.read(5000) * 30
128
129 def compute(s):
130 hashlib.sha1(s).digest()
131 return compute, (arg, )
132
133
134throughput_tasks = [task_pidigits, task_regex]
135for mod in 'bz2', 'hashlib':
136 try:
137 globals()[mod] = __import__(mod)
138 except ImportError:
139 globals()[mod] = None
140
141# For whatever reasons, zlib gives irregular results, so we prefer bz2 or
142# hashlib if available.
143# (NOTE: hashlib releases the GIL from 2.7 and 3.1 onwards)
144if bz2 is not None:
145 throughput_tasks.append(task_compress_bz2)
146elif hashlib is not None:
147 throughput_tasks.append(task_hashing)
148else:
149 throughput_tasks.append(task_compress_zlib)
150
151latency_tasks = throughput_tasks
152
153
154class TimedLoop:
155 def __init__(self, func, args):
156 self.func = func
157 self.args = args
158
159 def __call__(self, start_time, min_duration, end_event, do_yield=False):
160 step = 20
161 niters = 0
162 duration = 0.0
163 _time = time.time
164 _sleep = time.sleep
165 _func = self.func
166 _args = self.args
167 t1 = start_time
168 while True:
169 for i in range(step):
170 _func(*_args)
171 t2 = _time()
172 # If another thread terminated, the current measurement is invalid
173 # => return the previous one.
174 if end_event:
175 return niters, duration
176 niters += step
177 duration = t2 - start_time
178 if duration >= min_duration:
179 end_event.append(None)
180 return niters, duration
181 if t2 - t1 < 0.01:
182 # Minimize interference of measurement on overall runtime
183 step = step * 3 // 2
184 elif do_yield:
185 # OS scheduling of Python threads is sometimes so bad that we
186 # have to force thread switching ourselves, otherwise we get
187 # completely useless results.
188 _sleep(0.0001)
189 t1 = t2
190
191
192def run_throughput_test(func, args, nthreads):
193 assert nthreads >= 1
194
195 # Warm up
196 func(*args)
197
198 results = []
199 loop = TimedLoop(func, args)
200 end_event = []
201
202 if nthreads == 1:
203 # Pure single-threaded performance, without any switching or
204 # synchronization overhead.
205 start_time = time.time()
206 results.append(loop(start_time, THROUGHPUT_DURATION,
207 end_event, do_yield=False))
208 return results
209
210 started = False
211 ready_cond = threading.Condition()
212 start_cond = threading.Condition()
213 ready = []
214
215 def run():
216 with ready_cond:
217 ready.append(None)
218 ready_cond.notify()
219 with start_cond:
220 while not started:
221 start_cond.wait()
222 results.append(loop(start_time, THROUGHPUT_DURATION,
223 end_event, do_yield=True))
224
225 threads = []
226 for i in range(nthreads):
227 threads.append(threading.Thread(target=run))
228 for t in threads:
229 t.setDaemon(True)
230 t.start()
231 # We don't want measurements to include thread startup overhead,
232 # so we arrange for timing to start after all threads are ready.
233 with ready_cond:
234 while len(ready) < nthreads:
235 ready_cond.wait()
236 with start_cond:
237 start_time = time.time()
238 started = True
239 start_cond.notify(nthreads)
240 for t in threads:
241 t.join()
242
243 return results
244
245def run_throughput_tests(max_threads):
246 for task in throughput_tasks:
247 print(task.__doc__)
248 print()
249 func, args = task()
250 nthreads = 1
251 baseline_speed = None
252 while nthreads <= max_threads:
253 results = run_throughput_test(func, args, nthreads)
254 # Taking the max duration rather than average gives pessimistic
255 # results rather than optimistic.
256 speed = sum(r[0] for r in results) / max(r[1] for r in results)
257 print("threads=%d: %d" % (nthreads, speed), end="")
258 if baseline_speed is None:
259 print(" iterations/s.")
260 baseline_speed = speed
261 else:
262 print(" ( %d %%)" % (speed / baseline_speed * 100))
263 nthreads += 1
264 print()
265
266
267LAT_END = "END"
268
269def _sendto(sock, s, addr):
270 sock.sendto(s.encode('ascii'), addr)
271
272def _recv(sock, n):
273 return sock.recv(n).decode('ascii')
274
275def latency_client(addr, nb_pings, interval):
276 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
277 _time = time.time
278 _sleep = time.sleep
279 def _ping():
280 _sendto(sock, "%r\n" % _time(), addr)
281 # The first ping signals the parent process that we are ready.
282 _ping()
283 # We give the parent a bit of time to notice.
284 _sleep(1.0)
285 for i in range(nb_pings):
286 _sleep(interval)
287 _ping()
288 _sendto(sock, LAT_END + "\n", addr)
289
290def run_latency_client(**kwargs):
291 cmd_line = [sys.executable, '-E', os.path.abspath(__file__)]
292 cmd_line.extend(['--latclient', repr(kwargs)])
293 return subprocess.Popen(cmd_line) #, stdin=subprocess.PIPE,
294 #stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
295
296def run_latency_test(func, args, nthreads):
297 # Create a listening socket to receive the pings. We use UDP which should
298 # be painlessly cross-platform.
299 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
300 sock.bind(("127.0.0.1", 0))
301 addr = sock.getsockname()
302
303 interval = LATENCY_PING_INTERVAL
304 duration = LATENCY_DURATION
305 nb_pings = int(duration / interval)
306
307 results = []
308 threads = []
309 end_event = []
310 start_cond = threading.Condition()
311 started = False
312 if nthreads > 0:
313 # Warm up
314 func(*args)
315
316 results = []
317 loop = TimedLoop(func, args)
318 ready = []
319 ready_cond = threading.Condition()
320
321 def run():
322 with ready_cond:
323 ready.append(None)
324 ready_cond.notify()
325 with start_cond:
326 while not started:
327 start_cond.wait()
328 loop(start_time, duration * 1.5, end_event, do_yield=False)
329
330 for i in range(nthreads):
331 threads.append(threading.Thread(target=run))
332 for t in threads:
333 t.setDaemon(True)
334 t.start()
335 # Wait for threads to be ready
336 with ready_cond:
337 while len(ready) < nthreads:
338 ready_cond.wait()
339
340 # Run the client and wait for the first ping(s) to arrive before
341 # unblocking the background threads.
342 chunks = []
343 process = run_latency_client(addr=sock.getsockname(),
344 nb_pings=nb_pings, interval=interval)
345 s = _recv(sock, 4096)
346 _time = time.time
347
348 with start_cond:
349 start_time = _time()
350 started = True
351 start_cond.notify(nthreads)
352
353 while LAT_END not in s:
354 s = _recv(sock, 4096)
355 t = _time()
356 chunks.append((t, s))
357
358 # Tell the background threads to stop.
359 end_event.append(None)
360 for t in threads:
361 t.join()
362 process.wait()
363
364 for recv_time, chunk in chunks:
365 # NOTE: it is assumed that a line sent by a client wasn't received
366 # in two chunks because the lines are very small.
367 for line in chunk.splitlines():
368 line = line.strip()
369 if line and line != LAT_END:
370 send_time = eval(line)
371 assert isinstance(send_time, float)
372 results.append((send_time, recv_time))
373
374 return results
375
376def run_latency_tests(max_threads):
377 for task in latency_tasks:
378 print("Background CPU task:", task.__doc__)
379 print()
380 func, args = task()
381 nthreads = 0
382 while nthreads <= max_threads:
383 results = run_latency_test(func, args, nthreads)
384 n = len(results)
385 # We print out milliseconds
386 lats = [1000 * (t2 - t1) for (t1, t2) in results]
387 #print(list(map(int, lats)))
388 avg = sum(lats) / n
389 dev = (sum((x - avg) ** 2 for x in lats) / n) ** 0.5
390 print("CPU threads=%d: %d ms. (std dev: %d ms.)" % (nthreads, avg, dev), end="")
391 print()
392 #print(" [... from %d samples]" % n)
393 nthreads += 1
394 print()
395
396
397def main():
398 usage = "usage: %prog [-h|--help] [options]"
399 parser = OptionParser(usage=usage)
400 parser.add_option("-t", "--throughput",
401 action="store_true", dest="throughput", default=False,
402 help="run throughput tests")
403 parser.add_option("-l", "--latency",
404 action="store_true", dest="latency", default=False,
405 help="run latency tests")
406 parser.add_option("-i", "--interval",
407 action="store", type="int", dest="check_interval", default=None,
408 help="sys.setcheckinterval() value")
409 parser.add_option("-I", "--switch-interval",
410 action="store", type="float", dest="switch_interval", default=None,
411 help="sys.setswitchinterval() value")
412 parser.add_option("-n", "--num-threads",
413 action="store", type="int", dest="nthreads", default=4,
414 help="max number of threads in tests")
415
416 # Hidden option to run the pinging client
417 parser.add_option("", "--latclient",
418 action="store", dest="latclient", default=None,
419 help=SUPPRESS_HELP)
420
421 options, args = parser.parse_args()
422 if args:
423 parser.error("unexpected arguments")
424
425 if options.latclient:
426 kwargs = eval(options.latclient)
427 latency_client(**kwargs)
428 return
429
430 if not options.throughput and not options.latency:
431 options.throughput = options.latency = True
432 if options.check_interval:
433 sys.setcheckinterval(options.check_interval)
434 if options.switch_interval:
435 sys.setswitchinterval(options.switch_interval)
436
437 print("== %s %s (%s) ==" % (
438 platform.python_implementation(),
439 platform.python_version(),
440 platform.python_build()[0],
441 ))
442 # Processor identification often has repeated spaces
443 cpu = ' '.join(platform.processor().split())
444 print("== %s %s on '%s' ==" % (
445 platform.machine(),
446 platform.system(),
447 cpu,
448 ))
449 print()
450
451 if options.throughput:
452 print("--- Throughput ---")
453 print()
454 run_throughput_tests(options.nthreads)
455
456 if options.latency:
457 print("--- Latency ---")
458 print()
459 run_latency_tests(options.nthreads)
460
461if __name__ == "__main__":
462 main()