blob: a0a21fa0b2e1657f268aa460d2d470c67ea57e2a [file] [log] [blame]
Antoine Pitrou1584ae32012-04-09 17:03:32 +02001
2# Various microbenchmarks comparing unicode and byte string performance
3# Please keep this file both 2.x and 3.x compatible!
4
5import timeit
6import itertools
7import operator
8import re
9import sys
10import datetime
11import optparse
12
13VERSION = '2.0'
14
15def p(*args):
16 sys.stdout.write(' '.join(str(s) for s in args) + '\n')
17
18if sys.version_info >= (3,):
19 BYTES = bytes_from_str = lambda x: x.encode('ascii')
20 UNICODE = unicode_from_str = lambda x: x
21else:
22 BYTES = bytes_from_str = lambda x: x
23 UNICODE = unicode_from_str = lambda x: x.decode('ascii')
24
25class UnsupportedType(TypeError):
26 pass
27
28
29p('stringbench v%s' % VERSION)
30p(sys.version)
31p(datetime.datetime.now())
32
33REPEAT = 1
34REPEAT = 3
35#REPEAT = 7
36
37if __name__ != "__main__":
38 raise SystemExit("Must run as main program")
39
40parser = optparse.OptionParser()
41parser.add_option("-R", "--skip-re", dest="skip_re",
42 action="store_true",
43 help="skip regular expression tests")
44parser.add_option("-8", "--8-bit", dest="bytes_only",
45 action="store_true",
46 help="only do 8-bit string benchmarks")
47parser.add_option("-u", "--unicode", dest="unicode_only",
48 action="store_true",
49 help="only do Unicode string benchmarks")
50
51
52_RANGE_1000 = list(range(1000))
53_RANGE_100 = list(range(100))
54_RANGE_10 = list(range(10))
55
56dups = {}
57def bench(s, group, repeat_count):
58 def blah(f):
59 if f.__name__ in dups:
60 raise AssertionError("Multiple functions with same name: %r" %
61 (f.__name__,))
62 dups[f.__name__] = 1
63 f.comment = s
64 f.is_bench = True
65 f.group = group
66 f.repeat_count = repeat_count
67 return f
68 return blah
69
70def uses_re(f):
71 f.uses_re = True
72
73####### 'in' comparisons
74
75@bench('"A" in "A"*1000', "early match, single character", 1000)
76def in_test_quick_match_single_character(STR):
77 s1 = STR("A" * 1000)
78 s2 = STR("A")
79 for x in _RANGE_1000:
80 s2 in s1
81
82@bench('"B" in "A"*1000', "no match, single character", 1000)
83def in_test_no_match_single_character(STR):
84 s1 = STR("A" * 1000)
85 s2 = STR("B")
86 for x in _RANGE_1000:
87 s2 in s1
88
89
90@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
91def in_test_quick_match_two_characters(STR):
92 s1 = STR("AB" * 1000)
93 s2 = STR("AB")
94 for x in _RANGE_1000:
95 s2 in s1
96
97@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
98def in_test_no_match_two_character(STR):
99 s1 = STR("AB" * 1000)
100 s2 = STR("BC")
101 for x in _RANGE_1000:
102 s2 in s1
103
104@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
105def in_test_slow_match_two_characters(STR):
106 s1 = STR("AB" * 300+"C")
107 s2 = STR("BC")
108 for x in _RANGE_1000:
109 s2 in s1
110
111@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
112 "late match, 100 characters", 100)
113def in_test_slow_match_100_characters(STR):
114 m = STR("ABC"*33)
115 d = STR("D")
116 e = STR("E")
117 s1 = (m+d)*300 + m+e
118 s2 = m+e
119 for x in _RANGE_100:
120 s2 in s1
121
122# Try with regex
123@uses_re
124@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
125 "late match, 100 characters", 100)
126def re_test_slow_match_100_characters(STR):
127 m = STR("ABC"*33)
128 d = STR("D")
129 e = STR("E")
130 s1 = (m+d)*300 + m+e
131 s2 = m+e
132 pat = re.compile(s2)
133 search = pat.search
134 for x in _RANGE_100:
135 search(s1)
136
137
138#### same tests as 'in' but use 'find'
139
140@bench('("A"*1000).find("A")', "early match, single character", 1000)
141def find_test_quick_match_single_character(STR):
142 s1 = STR("A" * 1000)
143 s2 = STR("A")
144 s1_find = s1.find
145 for x in _RANGE_1000:
146 s1_find(s2)
147
148@bench('("A"*1000).find("B")', "no match, single character", 1000)
149def find_test_no_match_single_character(STR):
150 s1 = STR("A" * 1000)
151 s2 = STR("B")
152 s1_find = s1.find
153 for x in _RANGE_1000:
154 s1_find(s2)
155
156
157@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
158def find_test_quick_match_two_characters(STR):
159 s1 = STR("AB" * 1000)
160 s2 = STR("AB")
161 s1_find = s1.find
162 for x in _RANGE_1000:
163 s1_find(s2)
164
165@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
166def find_test_no_match_two_character(STR):
167 s1 = STR("AB" * 1000)
168 s2 = STR("BC")
169 s1_find = s1.find
170 for x in _RANGE_1000:
171 s1_find(s2)
172
173@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
174def find_test_no_match_two_character_bis(STR):
175 s1 = STR("AB" * 1000)
176 s2 = STR("CA")
177 s1_find = s1.find
178 for x in _RANGE_1000:
179 s1_find(s2)
180
181@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
182def find_test_slow_match_two_characters(STR):
183 s1 = STR("AB" * 300+"C")
184 s2 = STR("BC")
185 s1_find = s1.find
186 for x in _RANGE_1000:
187 s1_find(s2)
188
189@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
190def find_test_slow_match_two_characters_bis(STR):
191 s1 = STR("AB" * 300+"CA")
192 s2 = STR("CA")
193 s1_find = s1.find
194 for x in _RANGE_1000:
195 s1_find(s2)
196
197@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
198 "late match, 100 characters", 100)
199def find_test_slow_match_100_characters(STR):
200 m = STR("ABC"*33)
201 d = STR("D")
202 e = STR("E")
203 s1 = (m+d)*500 + m+e
204 s2 = m+e
205 s1_find = s1.find
206 for x in _RANGE_100:
207 s1_find(s2)
208
209@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
210 "late match, 100 characters", 100)
211def find_test_slow_match_100_characters_bis(STR):
212 m = STR("ABC"*33)
213 d = STR("D")
214 e = STR("E")
215 s1 = (m+d)*500 + e+m
216 s2 = e+m
217 s1_find = s1.find
218 for x in _RANGE_100:
219 s1_find(s2)
220
221
222#### Same tests for 'rfind'
223
224@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
225def rfind_test_quick_match_single_character(STR):
226 s1 = STR("A" * 1000)
227 s2 = STR("A")
228 s1_rfind = s1.rfind
229 for x in _RANGE_1000:
230 s1_rfind(s2)
231
232@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
233def rfind_test_no_match_single_character(STR):
234 s1 = STR("A" * 1000)
235 s2 = STR("B")
236 s1_rfind = s1.rfind
237 for x in _RANGE_1000:
238 s1_rfind(s2)
239
240
241@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
242def rfind_test_quick_match_two_characters(STR):
243 s1 = STR("AB" * 1000)
244 s2 = STR("AB")
245 s1_rfind = s1.rfind
246 for x in _RANGE_1000:
247 s1_rfind(s2)
248
249@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
250def rfind_test_no_match_two_character(STR):
251 s1 = STR("AB" * 1000)
252 s2 = STR("BC")
253 s1_rfind = s1.rfind
254 for x in _RANGE_1000:
255 s1_rfind(s2)
256
257@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
258def rfind_test_no_match_two_character_bis(STR):
259 s1 = STR("AB" * 1000)
260 s2 = STR("CA")
261 s1_rfind = s1.rfind
262 for x in _RANGE_1000:
263 s1_rfind(s2)
264
265@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
266def rfind_test_slow_match_two_characters(STR):
267 s1 = STR("C" + "AB" * 300)
268 s2 = STR("CA")
269 s1_rfind = s1.rfind
270 for x in _RANGE_1000:
271 s1_rfind(s2)
272
273@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
274def rfind_test_slow_match_two_characters_bis(STR):
275 s1 = STR("BC" + "AB" * 300)
276 s2 = STR("BC")
277 s1_rfind = s1.rfind
278 for x in _RANGE_1000:
279 s1_rfind(s2)
280
281@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
282 "late match, 100 characters", 100)
283def rfind_test_slow_match_100_characters(STR):
284 m = STR("ABC"*33)
285 d = STR("D")
286 e = STR("E")
287 s1 = e+m + (d+m)*500
288 s2 = e+m
289 s1_rfind = s1.rfind
290 for x in _RANGE_100:
291 s1_rfind(s2)
292
293@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
294 "late match, 100 characters", 100)
295def rfind_test_slow_match_100_characters_bis(STR):
296 m = STR("ABC"*33)
297 d = STR("D")
298 e = STR("E")
299 s1 = m+e + (d+m)*500
300 s2 = m+e
301 s1_rfind = s1.rfind
302 for x in _RANGE_100:
303 s1_rfind(s2)
304
305
306#### Now with index.
307# Skip the ones which fail because that would include exception overhead.
308
309@bench('("A"*1000).index("A")', "early match, single character", 1000)
310def index_test_quick_match_single_character(STR):
311 s1 = STR("A" * 1000)
312 s2 = STR("A")
313 s1_index = s1.index
314 for x in _RANGE_1000:
315 s1_index(s2)
316
317@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
318def index_test_quick_match_two_characters(STR):
319 s1 = STR("AB" * 1000)
320 s2 = STR("AB")
321 s1_index = s1.index
322 for x in _RANGE_1000:
323 s1_index(s2)
324
325@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
326def index_test_slow_match_two_characters(STR):
327 s1 = STR("AB" * 300+"C")
328 s2 = STR("BC")
329 s1_index = s1.index
330 for x in _RANGE_1000:
331 s1_index(s2)
332
333@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
334 "late match, 100 characters", 100)
335def index_test_slow_match_100_characters(STR):
336 m = STR("ABC"*33)
337 d = STR("D")
338 e = STR("E")
339 s1 = (m+d)*500 + m+e
340 s2 = m+e
341 s1_index = s1.index
342 for x in _RANGE_100:
343 s1_index(s2)
344
345
346#### Same for rindex
347
348@bench('("A"*1000).rindex("A")', "early match, single character", 1000)
349def rindex_test_quick_match_single_character(STR):
350 s1 = STR("A" * 1000)
351 s2 = STR("A")
352 s1_rindex = s1.rindex
353 for x in _RANGE_1000:
354 s1_rindex(s2)
355
356@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
357def rindex_test_quick_match_two_characters(STR):
358 s1 = STR("AB" * 1000)
359 s2 = STR("AB")
360 s1_rindex = s1.rindex
361 for x in _RANGE_1000:
362 s1_rindex(s2)
363
364@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
365def rindex_test_slow_match_two_characters(STR):
366 s1 = STR("C" + "AB" * 300)
367 s2 = STR("CA")
368 s1_rindex = s1.rindex
369 for x in _RANGE_1000:
370 s1_rindex(s2)
371
372@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
373 "late match, 100 characters", 100)
374def rindex_test_slow_match_100_characters(STR):
375 m = STR("ABC"*33)
376 d = STR("D")
377 e = STR("E")
378 s1 = e + m + (d+m)*500
379 s2 = e + m
380 s1_rindex = s1.rindex
381 for x in _RANGE_100:
382 s1_rindex(s2)
383
384
385#### Same for partition
386
387@bench('("A"*1000).partition("A")', "early match, single character", 1000)
388def partition_test_quick_match_single_character(STR):
389 s1 = STR("A" * 1000)
390 s2 = STR("A")
391 s1_partition = s1.partition
392 for x in _RANGE_1000:
393 s1_partition(s2)
394
395@bench('("A"*1000).partition("B")', "no match, single character", 1000)
396def partition_test_no_match_single_character(STR):
397 s1 = STR("A" * 1000)
398 s2 = STR("B")
399 s1_partition = s1.partition
400 for x in _RANGE_1000:
401 s1_partition(s2)
402
403
404@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
405def partition_test_quick_match_two_characters(STR):
406 s1 = STR("AB" * 1000)
407 s2 = STR("AB")
408 s1_partition = s1.partition
409 for x in _RANGE_1000:
410 s1_partition(s2)
411
412@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
413def partition_test_no_match_two_character(STR):
414 s1 = STR("AB" * 1000)
415 s2 = STR("BC")
416 s1_partition = s1.partition
417 for x in _RANGE_1000:
418 s1_partition(s2)
419
420@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
421def partition_test_slow_match_two_characters(STR):
422 s1 = STR("AB" * 300+"C")
423 s2 = STR("BC")
424 s1_partition = s1.partition
425 for x in _RANGE_1000:
426 s1_partition(s2)
427
428@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
429 "late match, 100 characters", 100)
430def partition_test_slow_match_100_characters(STR):
431 m = STR("ABC"*33)
432 d = STR("D")
433 e = STR("E")
434 s1 = (m+d)*500 + m+e
435 s2 = m+e
436 s1_partition = s1.partition
437 for x in _RANGE_100:
438 s1_partition(s2)
439
440
441#### Same for rpartition
442
443@bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
444def rpartition_test_quick_match_single_character(STR):
445 s1 = STR("A" * 1000)
446 s2 = STR("A")
447 s1_rpartition = s1.rpartition
448 for x in _RANGE_1000:
449 s1_rpartition(s2)
450
451@bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
452def rpartition_test_no_match_single_character(STR):
453 s1 = STR("A" * 1000)
454 s2 = STR("B")
455 s1_rpartition = s1.rpartition
456 for x in _RANGE_1000:
457 s1_rpartition(s2)
458
459
460@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
461def rpartition_test_quick_match_two_characters(STR):
462 s1 = STR("AB" * 1000)
463 s2 = STR("AB")
464 s1_rpartition = s1.rpartition
465 for x in _RANGE_1000:
466 s1_rpartition(s2)
467
468@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
469def rpartition_test_no_match_two_character(STR):
470 s1 = STR("AB" * 1000)
471 s2 = STR("BC")
472 s1_rpartition = s1.rpartition
473 for x in _RANGE_1000:
474 s1_rpartition(s2)
475
476@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
477def rpartition_test_slow_match_two_characters(STR):
478 s1 = STR("C" + "AB" * 300)
479 s2 = STR("CA")
480 s1_rpartition = s1.rpartition
481 for x in _RANGE_1000:
482 s1_rpartition(s2)
483
484@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
485 "late match, 100 characters", 100)
486def rpartition_test_slow_match_100_characters(STR):
487 m = STR("ABC"*33)
488 d = STR("D")
489 e = STR("E")
490 s1 = e + m + (d+m)*500
491 s2 = e + m
492 s1_rpartition = s1.rpartition
493 for x in _RANGE_100:
494 s1_rpartition(s2)
495
496
497#### Same for split(s, 1)
498
499@bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
500def split_test_quick_match_single_character(STR):
501 s1 = STR("A" * 1000)
502 s2 = STR("A")
503 s1_split = s1.split
504 for x in _RANGE_1000:
505 s1_split(s2, 1)
506
507@bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
508def split_test_no_match_single_character(STR):
509 s1 = STR("A" * 1000)
510 s2 = STR("B")
511 s1_split = s1.split
512 for x in _RANGE_1000:
513 s1_split(s2, 1)
514
515
516@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
517def split_test_quick_match_two_characters(STR):
518 s1 = STR("AB" * 1000)
519 s2 = STR("AB")
520 s1_split = s1.split
521 for x in _RANGE_1000:
522 s1_split(s2, 1)
523
524@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
525def split_test_no_match_two_character(STR):
526 s1 = STR("AB" * 1000)
527 s2 = STR("BC")
528 s1_split = s1.split
529 for x in _RANGE_1000:
530 s1_split(s2, 1)
531
532@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
533def split_test_slow_match_two_characters(STR):
534 s1 = STR("AB" * 300+"C")
535 s2 = STR("BC")
536 s1_split = s1.split
537 for x in _RANGE_1000:
538 s1_split(s2, 1)
539
540@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
541 "late match, 100 characters", 100)
542def split_test_slow_match_100_characters(STR):
543 m = STR("ABC"*33)
544 d = STR("D")
545 e = STR("E")
546 s1 = (m+d)*500 + m+e
547 s2 = m+e
548 s1_split = s1.split
549 for x in _RANGE_100:
550 s1_split(s2, 1)
551
552
553#### Same for rsplit(s, 1)
554
555@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
556def rsplit_test_quick_match_single_character(STR):
557 s1 = STR("A" * 1000)
558 s2 = STR("A")
559 s1_rsplit = s1.rsplit
560 for x in _RANGE_1000:
561 s1_rsplit(s2, 1)
562
563@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
564def rsplit_test_no_match_single_character(STR):
565 s1 = STR("A" * 1000)
566 s2 = STR("B")
567 s1_rsplit = s1.rsplit
568 for x in _RANGE_1000:
569 s1_rsplit(s2, 1)
570
571
572@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
573def rsplit_test_quick_match_two_characters(STR):
574 s1 = STR("AB" * 1000)
575 s2 = STR("AB")
576 s1_rsplit = s1.rsplit
577 for x in _RANGE_1000:
578 s1_rsplit(s2, 1)
579
580@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
581def rsplit_test_no_match_two_character(STR):
582 s1 = STR("AB" * 1000)
583 s2 = STR("BC")
584 s1_rsplit = s1.rsplit
585 for x in _RANGE_1000:
586 s1_rsplit(s2, 1)
587
588@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
589def rsplit_test_slow_match_two_characters(STR):
590 s1 = STR("C" + "AB" * 300)
591 s2 = STR("CA")
592 s1_rsplit = s1.rsplit
593 for x in _RANGE_1000:
594 s1_rsplit(s2, 1)
595
596@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
597 "late match, 100 characters", 100)
598def rsplit_test_slow_match_100_characters(STR):
599 m = STR("ABC"*33)
600 d = STR("D")
601 e = STR("E")
602 s1 = e + m + (d+m)*500
603 s2 = e + m
604 s1_rsplit = s1.rsplit
605 for x in _RANGE_100:
606 s1_rsplit(s2, 1)
607
608
609#### Benchmark the operator-based methods
610
611@bench('"A"*10', "repeat 1 character 10 times", 1000)
612def repeat_single_10_times(STR):
613 s = STR("A")
614 for x in _RANGE_1000:
615 s * 10
616
617@bench('"A"*1000', "repeat 1 character 1000 times", 1000)
618def repeat_single_1000_times(STR):
619 s = STR("A")
620 for x in _RANGE_1000:
621 s * 1000
622
623@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
624def repeat_5_10_times(STR):
625 s = STR("ABCDE")
626 for x in _RANGE_1000:
627 s * 10
628
629@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
630def repeat_5_1000_times(STR):
631 s = STR("ABCDE")
632 for x in _RANGE_1000:
633 s * 1000
634
635# + for concat
636
637@bench('"Andrew"+"Dalke"', "concat two strings", 1000)
638def concat_two_strings(STR):
639 s1 = STR("Andrew")
640 s2 = STR("Dalke")
641 for x in _RANGE_1000:
642 s1+s2
643
644@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
645 1000)
646def concat_many_strings(STR):
647 s1=STR('TIXSGYNREDCVBHJ')
648 s2=STR('PUMTLXBZVDO')
649 s3=STR('FVZNJ')
650 s4=STR('OGDXUW')
651 s5=STR('WEIMRNCOYVGHKB')
652 s6=STR('FCQTNMXPUZH')
653 s7=STR('TICZJYRLBNVUEAK')
654 s8=STR('REYB')
655 s9=STR('PWUOQ')
656 s10=STR('EQHCMKBS')
657 s11=STR('AEVDFOH')
658 s12=STR('IFHVD')
659 s13=STR('JGTCNLXWOHQ')
660 s14=STR('ITSKEPYLROZAWXF')
661 s15=STR('THEK')
662 s16=STR('GHPZFBUYCKMNJIT')
663 s17=STR('JMUZ')
664 s18=STR('WLZQMTB')
665 s19=STR('KPADCBW')
666 s20=STR('TNJHZQAGBU')
667 for x in _RANGE_1000:
668 (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
669 s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
670
671
672#### Benchmark join
673
674def get_bytes_yielding_seq(STR, arg):
675 if STR is BYTES and sys.version_info >= (3,):
676 raise UnsupportedType
677 return STR(arg)
678
679@bench('"A".join("")',
680 "join empty string, with 1 character sep", 100)
681def join_empty_single(STR):
682 sep = STR("A")
683 s2 = get_bytes_yielding_seq(STR, "")
684 sep_join = sep.join
685 for x in _RANGE_100:
686 sep_join(s2)
687
688@bench('"ABCDE".join("")',
689 "join empty string, with 5 character sep", 100)
690def join_empty_5(STR):
691 sep = STR("ABCDE")
692 s2 = get_bytes_yielding_seq(STR, "")
693 sep_join = sep.join
694 for x in _RANGE_100:
695 sep_join(s2)
696
697@bench('"A".join("ABC..Z")',
698 "join string with 26 characters, with 1 character sep", 1000)
699def join_alphabet_single(STR):
700 sep = STR("A")
701 s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
702 sep_join = sep.join
703 for x in _RANGE_1000:
704 sep_join(s2)
705
706@bench('"ABCDE".join("ABC..Z")',
707 "join string with 26 characters, with 5 character sep", 1000)
708def join_alphabet_5(STR):
709 sep = STR("ABCDE")
710 s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
711 sep_join = sep.join
712 for x in _RANGE_1000:
713 sep_join(s2)
714
715@bench('"A".join(list("ABC..Z"))',
716 "join list of 26 characters, with 1 character sep", 1000)
717def join_alphabet_list_single(STR):
718 sep = STR("A")
719 s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
720 sep_join = sep.join
721 for x in _RANGE_1000:
722 sep_join(s2)
723
724@bench('"ABCDE".join(list("ABC..Z"))',
725 "join list of 26 characters, with 5 character sep", 1000)
726def join_alphabet_list_five(STR):
727 sep = STR("ABCDE")
728 s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
729 sep_join = sep.join
730 for x in _RANGE_1000:
731 sep_join(s2)
732
733@bench('"A".join(["Bob"]*100))',
734 "join list of 100 words, with 1 character sep", 1000)
735def join_100_words_single(STR):
736 sep = STR("A")
737 s2 = [STR("Bob")]*100
738 sep_join = sep.join
739 for x in _RANGE_1000:
740 sep_join(s2)
741
742@bench('"ABCDE".join(["Bob"]*100))',
743 "join list of 100 words, with 5 character sep", 1000)
744def join_100_words_5(STR):
745 sep = STR("ABCDE")
746 s2 = [STR("Bob")]*100
747 sep_join = sep.join
748 for x in _RANGE_1000:
749 sep_join(s2)
750
751#### split tests
752
753@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
754def whitespace_split(STR):
755 s = STR("Here are some words. "*2)
756 s_split = s.split
757 for x in _RANGE_1000:
758 s_split()
759
760@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
761def whitespace_rsplit(STR):
762 s = STR("Here are some words. "*2)
763 s_rsplit = s.rsplit
764 for x in _RANGE_1000:
765 s_rsplit()
766
767@bench('("Here are some words. "*2).split(None, 1)',
768 "split 1 whitespace", 1000)
769def whitespace_split_1(STR):
770 s = STR("Here are some words. "*2)
771 s_split = s.split
772 N = None
773 for x in _RANGE_1000:
774 s_split(N, 1)
775
776@bench('("Here are some words. "*2).rsplit(None, 1)',
777 "split 1 whitespace", 1000)
778def whitespace_rsplit_1(STR):
779 s = STR("Here are some words. "*2)
780 s_rsplit = s.rsplit
781 N = None
782 for x in _RANGE_1000:
783 s_rsplit(N, 1)
784
785@bench('("Here are some words. "*2).partition(" ")',
786 "split 1 whitespace", 1000)
787def whitespace_partition(STR):
788 sep = STR(" ")
789 s = STR("Here are some words. "*2)
790 s_partition = s.partition
791 for x in _RANGE_1000:
792 s_partition(sep)
793
794@bench('("Here are some words. "*2).rpartition(" ")',
795 "split 1 whitespace", 1000)
796def whitespace_rpartition(STR):
797 sep = STR(" ")
798 s = STR("Here are some words. "*2)
799 s_rpartition = s.rpartition
800 for x in _RANGE_1000:
801 s_rpartition(sep)
802
803human_text = """\
804Python is a dynamic object-oriented programming language that can be
805used for many kinds of software development. It offers strong support
806for integration with other languages and tools, comes with extensive
807standard libraries, and can be learned in a few days. Many Python
808programmers report substantial productivity gains and feel the language
809encourages the development of higher quality, more maintainable code.
810
811Python runs on Windows, Linux/Unix, Mac OS X, OS/2, Amiga, Palm
812Handhelds, and Nokia mobile phones. Python has also been ported to the
813Java and .NET virtual machines.
814
815Python is distributed under an OSI-approved open source license that
816makes it free to use, even for commercial products.
817"""*25
818human_text_bytes = bytes_from_str(human_text)
819human_text_unicode = unicode_from_str(human_text)
820def _get_human_text(STR):
821 if STR is UNICODE:
822 return human_text_unicode
823 if STR is BYTES:
824 return human_text_bytes
825 raise AssertionError
826
827@bench('human_text.split()', "split whitespace (huge)", 10)
828def whitespace_split_huge(STR):
829 s = _get_human_text(STR)
830 s_split = s.split
831 for x in _RANGE_10:
832 s_split()
833
834@bench('human_text.rsplit()', "split whitespace (huge)", 10)
835def whitespace_rsplit_huge(STR):
836 s = _get_human_text(STR)
837 s_rsplit = s.rsplit
838 for x in _RANGE_10:
839 s_rsplit()
840
841
842
843@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
844def newlines_split(STR):
845 s = STR("this\nis\na\ntest\n")
846 s_split = s.split
847 nl = STR("\n")
848 for x in _RANGE_1000:
849 s_split(nl)
850
851
852@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
853def newlines_rsplit(STR):
854 s = STR("this\nis\na\ntest\n")
855 s_rsplit = s.rsplit
856 nl = STR("\n")
857 for x in _RANGE_1000:
858 s_rsplit(nl)
859
860@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
861def newlines_splitlines(STR):
862 s = STR("this\nis\na\ntest\n")
863 s_splitlines = s.splitlines
864 for x in _RANGE_1000:
865 s_splitlines()
866
867## split text with 2000 newlines
868
869def _make_2000_lines():
870 import random
871 r = random.Random(100)
872 chars = list(map(chr, range(32, 128)))
873 i = 0
874 while i < len(chars):
875 chars[i] = " "
876 i += r.randrange(9)
877 s = "".join(chars)
878 s = s*4
879 words = []
880 for i in range(2000):
881 start = r.randrange(96)
882 n = r.randint(5, 65)
883 words.append(s[start:start+n])
884 return "\n".join(words)+"\n"
885
886_text_with_2000_lines = _make_2000_lines()
887_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
888_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
889def _get_2000_lines(STR):
890 if STR is UNICODE:
891 return _text_with_2000_lines_unicode
892 if STR is BYTES:
893 return _text_with_2000_lines_bytes
894 raise AssertionError
895
896
897@bench('"...text...".split("\\n")', "split 2000 newlines", 10)
898def newlines_split_2000(STR):
899 s = _get_2000_lines(STR)
900 s_split = s.split
901 nl = STR("\n")
902 for x in _RANGE_10:
903 s_split(nl)
904
905@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
906def newlines_rsplit_2000(STR):
907 s = _get_2000_lines(STR)
908 s_rsplit = s.rsplit
909 nl = STR("\n")
910 for x in _RANGE_10:
911 s_rsplit(nl)
912
913@bench('"...text...".splitlines()', "split 2000 newlines", 10)
914def newlines_splitlines_2000(STR):
915 s = _get_2000_lines(STR)
916 s_splitlines = s.splitlines
917 for x in _RANGE_10:
918 s_splitlines()
919
920
921## split text on "--" characters
922@bench(
923 '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
924 "split on multicharacter separator (small)", 1000)
925def split_multichar_sep_small(STR):
926 s = STR("this--is--a--test--of--the--emergency--broadcast--system")
927 s_split = s.split
928 pat = STR("--")
929 for x in _RANGE_1000:
930 s_split(pat)
931@bench(
932 '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
933 "split on multicharacter separator (small)", 1000)
934def rsplit_multichar_sep_small(STR):
935 s = STR("this--is--a--test--of--the--emergency--broadcast--system")
936 s_rsplit = s.rsplit
937 pat = STR("--")
938 for x in _RANGE_1000:
939 s_rsplit(pat)
940
941## split dna text on "ACTAT" characters
942@bench('dna.split("ACTAT")',
943 "split on multicharacter separator (dna)", 10)
944def split_multichar_sep_dna(STR):
945 s = _get_dna(STR)
946 s_split = s.split
947 pat = STR("ACTAT")
948 for x in _RANGE_10:
949 s_split(pat)
950
951@bench('dna.rsplit("ACTAT")',
952 "split on multicharacter separator (dna)", 10)
953def rsplit_multichar_sep_dna(STR):
954 s = _get_dna(STR)
955 s_rsplit = s.rsplit
956 pat = STR("ACTAT")
957 for x in _RANGE_10:
958 s_rsplit(pat)
959
960
961
962## split with limits
963
964GFF3_example = "\t".join([
965 "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
966 "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
967
968@bench('GFF3_example.split("\\t")', "tab split", 1000)
969def tab_split_no_limit(STR):
970 sep = STR("\t")
971 s = STR(GFF3_example)
972 s_split = s.split
973 for x in _RANGE_1000:
974 s_split(sep)
975
976@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
977def tab_split_limit(STR):
978 sep = STR("\t")
979 s = STR(GFF3_example)
980 s_split = s.split
981 for x in _RANGE_1000:
982 s_split(sep, 8)
983
984@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
985def tab_rsplit_no_limit(STR):
986 sep = STR("\t")
987 s = STR(GFF3_example)
988 s_rsplit = s.rsplit
989 for x in _RANGE_1000:
990 s_rsplit(sep)
991
992@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
993def tab_rsplit_limit(STR):
994 sep = STR("\t")
995 s = STR(GFF3_example)
996 s_rsplit = s.rsplit
997 for x in _RANGE_1000:
998 s_rsplit(sep, 8)
999
1000#### Count characters
1001
1002@bench('...text.with.2000.newlines.count("\\n")',
1003 "count newlines", 10)
1004def count_newlines(STR):
1005 s = _get_2000_lines(STR)
1006 s_count = s.count
1007 nl = STR("\n")
1008 for x in _RANGE_10:
1009 s_count(nl)
1010
1011# Orchid sequences concatenated, from Biopython
1012_dna = """
1013CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
1014AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
1015AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
1016TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
1017AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
1018TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
1019CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
1020TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
1021GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
1022TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
1023GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
1024ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
1025CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
1026ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
1027ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
1028TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
1029CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
1030GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
1031ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
1032ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
1033ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
1034GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
1035TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
1036TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
1037TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
1038GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
1039GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
1040AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
1041GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
1042TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
1043CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
1044TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
1045TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
1046AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
1047GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
1048GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
1049CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
1050GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
1051TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
1052ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
1053GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
1054AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
1055AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
1056ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
1057GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
1058GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
1059AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
1060GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
1061ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
1062GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
1063"""
1064_dna = "".join(_dna.splitlines())
1065_dna = _dna * 25
1066_dna_bytes = bytes_from_str(_dna)
1067_dna_unicode = unicode_from_str(_dna)
1068
1069def _get_dna(STR):
1070 if STR is UNICODE:
1071 return _dna_unicode
1072 if STR is BYTES:
1073 return _dna_bytes
1074 raise AssertionError
1075
1076@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
1077def count_aact(STR):
1078 seq = _get_dna(STR)
1079 seq_count = seq.count
1080 needle = STR("AACT")
1081 for x in _RANGE_10:
1082 seq_count(needle)
1083
1084##### startswith and endswith
1085
1086@bench('"Andrew".startswith("A")', 'startswith single character', 1000)
1087def startswith_single(STR):
1088 s1 = STR("Andrew")
1089 s2 = STR("A")
1090 s1_startswith = s1.startswith
1091 for x in _RANGE_1000:
1092 s1_startswith(s2)
1093
1094@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
1095 1000)
1096def startswith_multiple(STR):
1097 s1 = STR("Andrew")
1098 s2 = STR("Andrew")
1099 s1_startswith = s1.startswith
1100 for x in _RANGE_1000:
1101 s1_startswith(s2)
1102
1103@bench('"Andrew".startswith("Anders")',
1104 'startswith multiple characters - not!', 1000)
1105def startswith_multiple_not(STR):
1106 s1 = STR("Andrew")
1107 s2 = STR("Anders")
1108 s1_startswith = s1.startswith
1109 for x in _RANGE_1000:
1110 s1_startswith(s2)
1111
1112
1113# endswith
1114
1115@bench('"Andrew".endswith("w")', 'endswith single character', 1000)
1116def endswith_single(STR):
1117 s1 = STR("Andrew")
1118 s2 = STR("w")
1119 s1_endswith = s1.endswith
1120 for x in _RANGE_1000:
1121 s1_endswith(s2)
1122
1123@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
1124def endswith_multiple(STR):
1125 s1 = STR("Andrew")
1126 s2 = STR("Andrew")
1127 s1_endswith = s1.endswith
1128 for x in _RANGE_1000:
1129 s1_endswith(s2)
1130
1131@bench('"Andrew".endswith("Anders")',
1132 'endswith multiple characters - not!', 1000)
1133def endswith_multiple_not(STR):
1134 s1 = STR("Andrew")
1135 s2 = STR("Anders")
1136 s1_endswith = s1.endswith
1137 for x in _RANGE_1000:
1138 s1_endswith(s2)
1139
1140#### Strip
1141
1142@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
1143def terminal_newline_strip_right(STR):
1144 s = STR("Hello!\n")
1145 s_strip = s.strip
1146 for x in _RANGE_1000:
1147 s_strip()
1148
1149@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
1150def terminal_newline_rstrip(STR):
1151 s = STR("Hello!\n")
1152 s_rstrip = s.rstrip
1153 for x in _RANGE_1000:
1154 s_rstrip()
1155
1156@bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
1157def terminal_newline_strip_left(STR):
1158 s = STR("\nHello!")
1159 s_strip = s.strip
1160 for x in _RANGE_1000:
1161 s_strip()
1162
1163@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
1164def terminal_newline_strip_both(STR):
1165 s = STR("\nHello!\n")
1166 s_strip = s.strip
1167 for x in _RANGE_1000:
1168 s_strip()
1169
1170@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
1171def terminal_newline_lstrip(STR):
1172 s = STR("\nHello!")
1173 s_lstrip = s.lstrip
1174 for x in _RANGE_1000:
1175 s_lstrip()
1176
1177@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
1178 'strip terminal newline', 1000)
1179def terminal_newline_if_else(STR):
1180 s = STR("Hello!\n")
1181 NL = STR("\n")
1182 for x in _RANGE_1000:
1183 s[:-1] if (s[-1] == NL) else s
1184
1185
1186# Strip multiple spaces or tabs
1187
1188@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000)
1189def terminal_space_strip(STR):
1190 s = STR("Hello\t \t!")
1191 s_strip = s.strip
1192 for x in _RANGE_1000:
1193 s_strip()
1194
1195@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
1196def terminal_space_rstrip(STR):
1197 s = STR("Hello!\t \t")
1198 s_rstrip = s.rstrip
1199 for x in _RANGE_1000:
1200 s_rstrip()
1201
1202@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
1203def terminal_space_lstrip(STR):
1204 s = STR("\t \tHello!")
1205 s_lstrip = s.lstrip
1206 for x in _RANGE_1000:
1207 s_lstrip()
1208
1209
1210#### replace
1211@bench('"This is a test".replace(" ", "\\t")', 'replace single character',
1212 1000)
1213def replace_single_character(STR):
1214 s = STR("This is a test!")
1215 from_str = STR(" ")
1216 to_str = STR("\t")
1217 s_replace = s.replace
1218 for x in _RANGE_1000:
1219 s_replace(from_str, to_str)
1220
1221@uses_re
1222@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
1223 1000)
1224def replace_single_character_re(STR):
1225 s = STR("This is a test!")
1226 pat = re.compile(STR(" "))
1227 to_str = STR("\t")
1228 pat_sub = pat.sub
1229 for x in _RANGE_1000:
1230 pat_sub(to_str, s)
1231
1232@bench('"...text.with.2000.lines...replace("\\n", " ")',
1233 'replace single character, big string', 10)
1234def replace_single_character_big(STR):
1235 s = _get_2000_lines(STR)
1236 from_str = STR("\n")
1237 to_str = STR(" ")
1238 s_replace = s.replace
1239 for x in _RANGE_10:
1240 s_replace(from_str, to_str)
1241
1242@uses_re
1243@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
1244 'replace single character, big string', 10)
1245def replace_single_character_big_re(STR):
1246 s = _get_2000_lines(STR)
1247 pat = re.compile(STR("\n"))
1248 to_str = STR(" ")
1249 pat_sub = pat.sub
1250 for x in _RANGE_10:
1251 pat_sub(to_str, s)
1252
1253
1254@bench('dna.replace("ATC", "ATT")',
1255 'replace multiple characters, dna', 10)
1256def replace_multiple_characters_dna(STR):
1257 seq = _get_dna(STR)
1258 from_str = STR("ATC")
1259 to_str = STR("ATT")
1260 seq_replace = seq.replace
1261 for x in _RANGE_10:
1262 seq_replace(from_str, to_str)
1263
1264# This increases the character count
1265@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
1266 'replace and expand multiple characters, big string', 10)
1267def replace_multiple_character_big(STR):
1268 s = _get_2000_lines(STR)
1269 from_str = STR("\n")
1270 to_str = STR("\r\n")
1271 s_replace = s.replace
1272 for x in _RANGE_10:
1273 s_replace(from_str, to_str)
1274
1275
1276# This decreases the character count
1277@bench('"When shall we three meet again?".replace("ee", "")',
1278 'replace/remove multiple characters', 1000)
1279def replace_multiple_character_remove(STR):
1280 s = STR("When shall we three meet again?")
1281 from_str = STR("ee")
1282 to_str = STR("")
1283 s_replace = s.replace
1284 for x in _RANGE_1000:
1285 s_replace(from_str, to_str)
1286
1287
1288big_s = "A" + ("Z"*128*1024)
1289big_s_bytes = bytes_from_str(big_s)
1290big_s_unicode = unicode_from_str(big_s)
1291def _get_big_s(STR):
1292 if STR is UNICODE: return big_s_unicode
1293 if STR is BYTES: return big_s_bytes
1294 raise AssertionError
1295
1296# The older replace implementation counted all matches in
1297# the string even when it only neeed to make one replacement.
1298@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
1299 'quick replace single character match', 10)
1300def quick_replace_single_match(STR):
1301 s = _get_big_s(STR)
1302 from_str = STR("A")
1303 to_str = STR("BB")
1304 s_replace = s.replace
1305 for x in _RANGE_10:
1306 s_replace(from_str, to_str, 1)
1307
1308@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
1309 'quick replace multiple character match', 10)
1310def quick_replace_multiple_match(STR):
1311 s = _get_big_s(STR)
1312 from_str = STR("AZZ")
1313 to_str = STR("BBZZ")
1314 s_replace = s.replace
1315 for x in _RANGE_10:
1316 s_replace(from_str, to_str, 1)
1317
1318
1319####
1320
1321# CCP does a lot of this, for internationalisation of ingame messages.
1322_format = "The %(thing)s is %(place)s the %(location)s."
1323_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
1324_format_bytes = bytes_from_str(_format)
1325_format_unicode = unicode_from_str(_format)
1326_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
1327_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
1328
1329def _get_format(STR):
1330 if STR is UNICODE:
1331 return _format_unicode
1332 if STR is BYTES:
1333 if sys.version_info >= (3,):
1334 raise UnsupportedType
1335 return _format_bytes
1336 raise AssertionError
1337
1338def _get_format_dict(STR):
1339 if STR is UNICODE:
1340 return _format_dict_unicode
1341 if STR is BYTES:
1342 if sys.version_info >= (3,):
1343 raise UnsupportedType
1344 return _format_dict_bytes
1345 raise AssertionError
1346
1347# Formatting.
1348@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
1349 'formatting a string type with a dict', 1000)
1350def format_with_dict(STR):
1351 s = _get_format(STR)
1352 d = _get_format_dict(STR)
1353 for x in _RANGE_1000:
1354 s % d
1355
1356
1357#### Upper- and lower- case conversion
1358
1359@bench('("Where in the world is Carmen San Deigo?"*10).lower()',
1360 "case conversion -- rare", 1000)
1361def lower_conversion_rare(STR):
1362 s = STR("Where in the world is Carmen San Deigo?"*10)
1363 s_lower = s.lower
1364 for x in _RANGE_1000:
1365 s_lower()
1366
1367@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
1368 "case conversion -- dense", 1000)
1369def lower_conversion_dense(STR):
1370 s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
1371 s_lower = s.lower
1372 for x in _RANGE_1000:
1373 s_lower()
1374
1375
1376@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
1377 "case conversion -- rare", 1000)
1378def upper_conversion_rare(STR):
1379 s = STR("Where in the world is Carmen San Deigo?"*10)
1380 s_upper = s.upper
1381 for x in _RANGE_1000:
1382 s_upper()
1383
1384@bench('("where in the world is carmen san deigo?"*10).upper()',
1385 "case conversion -- dense", 1000)
1386def upper_conversion_dense(STR):
1387 s = STR("where in the world is carmen san deigo?"*10)
1388 s_upper = s.upper
1389 for x in _RANGE_1000:
1390 s_upper()
1391
1392
1393# end of benchmarks
1394
1395#################
1396
1397class BenchTimer(timeit.Timer):
1398 def best(self, repeat=1):
1399 for i in range(1, 10):
1400 number = 10**i
1401 x = self.timeit(number)
1402 if x > 0.02:
1403 break
1404 times = [x]
1405 for i in range(1, repeat):
1406 times.append(self.timeit(number))
1407 return min(times) / number
1408
1409def main():
1410 (options, test_names) = parser.parse_args()
1411 if options.bytes_only and options.unicode_only:
1412 raise SystemExit("Only one of --8-bit and --unicode are allowed")
1413
1414 bench_functions = []
1415 for (k,v) in globals().items():
1416 if hasattr(v, "is_bench"):
1417 if test_names:
1418 for name in test_names:
1419 if name in v.group:
1420 break
1421 else:
1422 # Not selected, ignore
1423 continue
1424 if options.skip_re and hasattr(v, "uses_re"):
1425 continue
1426
1427 bench_functions.append( (v.group, k, v) )
1428 bench_functions.sort()
1429
1430 p("bytes\tunicode")
1431 p("(in ms)\t(in ms)\t%\tcomment")
1432
1433 bytes_total = uni_total = 0.0
1434
1435 for title, group in itertools.groupby(bench_functions,
1436 operator.itemgetter(0)):
1437 # Flush buffer before each group
1438 sys.stdout.flush()
1439 p("="*10, title)
1440 for (_, k, v) in group:
1441 if hasattr(v, "is_bench"):
1442 bytes_time = 0.0
1443 bytes_time_s = " - "
1444 if not options.unicode_only:
1445 try:
1446 bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
1447 "import __main__").best(REPEAT)
1448 bytes_time_s = "%.2f" % (1000 * bytes_time)
1449 bytes_total += bytes_time
1450 except UnsupportedType:
1451 bytes_time_s = "N/A"
1452 uni_time = 0.0
1453 uni_time_s = " - "
1454 if not options.bytes_only:
1455 try:
1456 uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
1457 "import __main__").best(REPEAT)
1458 uni_time_s = "%.2f" % (1000 * uni_time)
1459 uni_total += uni_time
1460 except UnsupportedType:
1461 uni_time_s = "N/A"
1462 try:
1463 average = bytes_time/uni_time
1464 except (TypeError, ZeroDivisionError):
1465 average = 0.0
1466 p("%s\t%s\t%.1f\t%s (*%d)" % (
1467 bytes_time_s, uni_time_s, 100.*average,
1468 v.comment, v.repeat_count))
1469
1470 if bytes_total == uni_total == 0.0:
1471 p("That was zippy!")
1472 else:
1473 try:
1474 ratio = bytes_total/uni_total
1475 except ZeroDivisionError:
1476 ratio = 0.0
1477 p("%.2f\t%.2f\t%.1f\t%s" % (
1478 1000*bytes_total, 1000*uni_total, 100.*ratio,
1479 "TOTAL"))
1480
1481if __name__ == "__main__":
1482 main()