Blame - Tools/stringbench/stringbench.py - platform/external/python/cpython3

blob: a0a21fa0b2e1657f268aa460d2d470c67ea57e2a [file] [log] [blame]

Antoine Pitrou	1584ae3	2012-04-09 17:03:32 +0200	[diff] [blame]	1
				2	# Various microbenchmarks comparing unicode and byte string performance
				3	# Please keep this file both 2.x and 3.x compatible!
				4
				5	import timeit
				6	import itertools
				7	import operator
				8	import re
				9	import sys
				10	import datetime
				11	import optparse
				12
				13	VERSION = '2.0'
				14
				15	def p(*args):
				16	sys.stdout.write(' '.join(str(s) for s in args) + '\n')
				17
				18	if sys.version_info >= (3,):
				19	BYTES = bytes_from_str = lambda x: x.encode('ascii')
				20	UNICODE = unicode_from_str = lambda x: x
				21	else:
				22	BYTES = bytes_from_str = lambda x: x
				23	UNICODE = unicode_from_str = lambda x: x.decode('ascii')
				24
				25	class UnsupportedType(TypeError):
				26	pass
				27
				28
				29	p('stringbench v%s' % VERSION)
				30	p(sys.version)
				31	p(datetime.datetime.now())
				32
				33	REPEAT = 1
				34	REPEAT = 3
				35	#REPEAT = 7
				36
				37	if __name__ != "__main__":
				38	raise SystemExit("Must run as main program")
				39
				40	parser = optparse.OptionParser()
				41	parser.add_option("-R", "--skip-re", dest="skip_re",
				42	action="store_true",
				43	help="skip regular expression tests")
				44	parser.add_option("-8", "--8-bit", dest="bytes_only",
				45	action="store_true",
				46	help="only do 8-bit string benchmarks")
				47	parser.add_option("-u", "--unicode", dest="unicode_only",
				48	action="store_true",
				49	help="only do Unicode string benchmarks")
				50
				51
				52	_RANGE_1000 = list(range(1000))
				53	_RANGE_100 = list(range(100))
				54	_RANGE_10 = list(range(10))
				55
				56	dups = {}
				57	def bench(s, group, repeat_count):
				58	def blah(f):
				59	if f.__name__ in dups:
				60	raise AssertionError("Multiple functions with same name: %r" %
				61	(f.__name__,))
				62	dups[f.__name__] = 1
				63	f.comment = s
				64	f.is_bench = True
				65	f.group = group
				66	f.repeat_count = repeat_count
				67	return f
				68	return blah
				69
				70	def uses_re(f):
				71	f.uses_re = True
				72
				73	####### 'in' comparisons
				74
				75	@bench('"A" in "A"*1000', "early match, single character", 1000)
				76	def in_test_quick_match_single_character(STR):
				77	s1 = STR("A" * 1000)
				78	s2 = STR("A")
				79	for x in _RANGE_1000:
				80	s2 in s1
				81
				82	@bench('"B" in "A"*1000', "no match, single character", 1000)
				83	def in_test_no_match_single_character(STR):
				84	s1 = STR("A" * 1000)
				85	s2 = STR("B")
				86	for x in _RANGE_1000:
				87	s2 in s1
				88
				89
				90	@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
				91	def in_test_quick_match_two_characters(STR):
				92	s1 = STR("AB" * 1000)
				93	s2 = STR("AB")
				94	for x in _RANGE_1000:
				95	s2 in s1
				96
				97	@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
				98	def in_test_no_match_two_character(STR):
				99	s1 = STR("AB" * 1000)
				100	s2 = STR("BC")
				101	for x in _RANGE_1000:
				102	s2 in s1
				103
				104	@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
				105	def in_test_slow_match_two_characters(STR):
				106	s1 = STR("AB" * 300+"C")
				107	s2 = STR("BC")
				108	for x in _RANGE_1000:
				109	s2 in s1
				110
				111	@bench('s="ABC"33; (s+"E") in ((s+"D")300+s+"E")',
				112	"late match, 100 characters", 100)
				113	def in_test_slow_match_100_characters(STR):
				114	m = STR("ABC"*33)
				115	d = STR("D")
				116	e = STR("E")
				117	s1 = (m+d)*300 + m+e
				118	s2 = m+e
				119	for x in _RANGE_100:
				120	s2 in s1
				121
				122	# Try with regex
				123	@uses_re
				124	@bench('s="ABC"33; re.compile(s+"D").search((s+"D")300+s+"E")',
				125	"late match, 100 characters", 100)
				126	def re_test_slow_match_100_characters(STR):
				127	m = STR("ABC"*33)
				128	d = STR("D")
				129	e = STR("E")
				130	s1 = (m+d)*300 + m+e
				131	s2 = m+e
				132	pat = re.compile(s2)
				133	search = pat.search
				134	for x in _RANGE_100:
				135	search(s1)
				136
				137
				138	#### same tests as 'in' but use 'find'
				139
				140	@bench('("A"*1000).find("A")', "early match, single character", 1000)
				141	def find_test_quick_match_single_character(STR):
				142	s1 = STR("A" * 1000)
				143	s2 = STR("A")
				144	s1_find = s1.find
				145	for x in _RANGE_1000:
				146	s1_find(s2)
				147
				148	@bench('("A"*1000).find("B")', "no match, single character", 1000)
				149	def find_test_no_match_single_character(STR):
				150	s1 = STR("A" * 1000)
				151	s2 = STR("B")
				152	s1_find = s1.find
				153	for x in _RANGE_1000:
				154	s1_find(s2)
				155
				156
				157	@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
				158	def find_test_quick_match_two_characters(STR):
				159	s1 = STR("AB" * 1000)
				160	s2 = STR("AB")
				161	s1_find = s1.find
				162	for x in _RANGE_1000:
				163	s1_find(s2)
				164
				165	@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
				166	def find_test_no_match_two_character(STR):
				167	s1 = STR("AB" * 1000)
				168	s2 = STR("BC")
				169	s1_find = s1.find
				170	for x in _RANGE_1000:
				171	s1_find(s2)
				172
				173	@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
				174	def find_test_no_match_two_character_bis(STR):
				175	s1 = STR("AB" * 1000)
				176	s2 = STR("CA")
				177	s1_find = s1.find
				178	for x in _RANGE_1000:
				179	s1_find(s2)
				180
				181	@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
				182	def find_test_slow_match_two_characters(STR):
				183	s1 = STR("AB" * 300+"C")
				184	s2 = STR("BC")
				185	s1_find = s1.find
				186	for x in _RANGE_1000:
				187	s1_find(s2)
				188
				189	@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
				190	def find_test_slow_match_two_characters_bis(STR):
				191	s1 = STR("AB" * 300+"CA")
				192	s2 = STR("CA")
				193	s1_find = s1.find
				194	for x in _RANGE_1000:
				195	s1_find(s2)
				196
				197	@bench('s="ABC"33; ((s+"D")500+s+"E").find(s+"E")',
				198	"late match, 100 characters", 100)
				199	def find_test_slow_match_100_characters(STR):
				200	m = STR("ABC"*33)
				201	d = STR("D")
				202	e = STR("E")
				203	s1 = (m+d)*500 + m+e
				204	s2 = m+e
				205	s1_find = s1.find
				206	for x in _RANGE_100:
				207	s1_find(s2)
				208
				209	@bench('s="ABC"33; ((s+"D")500+"E"+s).find("E"+s)',
				210	"late match, 100 characters", 100)
				211	def find_test_slow_match_100_characters_bis(STR):
				212	m = STR("ABC"*33)
				213	d = STR("D")
				214	e = STR("E")
				215	s1 = (m+d)*500 + e+m
				216	s2 = e+m
				217	s1_find = s1.find
				218	for x in _RANGE_100:
				219	s1_find(s2)
				220
				221
				222	#### Same tests for 'rfind'
				223
				224	@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
				225	def rfind_test_quick_match_single_character(STR):
				226	s1 = STR("A" * 1000)
				227	s2 = STR("A")
				228	s1_rfind = s1.rfind
				229	for x in _RANGE_1000:
				230	s1_rfind(s2)
				231
				232	@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
				233	def rfind_test_no_match_single_character(STR):
				234	s1 = STR("A" * 1000)
				235	s2 = STR("B")
				236	s1_rfind = s1.rfind
				237	for x in _RANGE_1000:
				238	s1_rfind(s2)
				239
				240
				241	@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
				242	def rfind_test_quick_match_two_characters(STR):
				243	s1 = STR("AB" * 1000)
				244	s2 = STR("AB")
				245	s1_rfind = s1.rfind
				246	for x in _RANGE_1000:
				247	s1_rfind(s2)
				248
				249	@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
				250	def rfind_test_no_match_two_character(STR):
				251	s1 = STR("AB" * 1000)
				252	s2 = STR("BC")
				253	s1_rfind = s1.rfind
				254	for x in _RANGE_1000:
				255	s1_rfind(s2)
				256
				257	@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
				258	def rfind_test_no_match_two_character_bis(STR):
				259	s1 = STR("AB" * 1000)
				260	s2 = STR("CA")
				261	s1_rfind = s1.rfind
				262	for x in _RANGE_1000:
				263	s1_rfind(s2)
				264
				265	@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
				266	def rfind_test_slow_match_two_characters(STR):
				267	s1 = STR("C" + "AB" * 300)
				268	s2 = STR("CA")
				269	s1_rfind = s1.rfind
				270	for x in _RANGE_1000:
				271	s1_rfind(s2)
				272
				273	@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
				274	def rfind_test_slow_match_two_characters_bis(STR):
				275	s1 = STR("BC" + "AB" * 300)
				276	s2 = STR("BC")
				277	s1_rfind = s1.rfind
				278	for x in _RANGE_1000:
				279	s1_rfind(s2)
				280
				281	@bench('s="ABC"33; ("E"+s+("D"+s)500).rfind("E"+s)',
				282	"late match, 100 characters", 100)
				283	def rfind_test_slow_match_100_characters(STR):
				284	m = STR("ABC"*33)
				285	d = STR("D")
				286	e = STR("E")
				287	s1 = e+m + (d+m)*500
				288	s2 = e+m
				289	s1_rfind = s1.rfind
				290	for x in _RANGE_100:
				291	s1_rfind(s2)
				292
				293	@bench('s="ABC"33; (s+"E"+("D"+s)500).rfind(s+"E")',
				294	"late match, 100 characters", 100)
				295	def rfind_test_slow_match_100_characters_bis(STR):
				296	m = STR("ABC"*33)
				297	d = STR("D")
				298	e = STR("E")
				299	s1 = m+e + (d+m)*500
				300	s2 = m+e
				301	s1_rfind = s1.rfind
				302	for x in _RANGE_100:
				303	s1_rfind(s2)
				304
				305
				306	#### Now with index.
				307	# Skip the ones which fail because that would include exception overhead.
				308
				309	@bench('("A"*1000).index("A")', "early match, single character", 1000)
				310	def index_test_quick_match_single_character(STR):
				311	s1 = STR("A" * 1000)
				312	s2 = STR("A")
				313	s1_index = s1.index
				314	for x in _RANGE_1000:
				315	s1_index(s2)
				316
				317	@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
				318	def index_test_quick_match_two_characters(STR):
				319	s1 = STR("AB" * 1000)
				320	s2 = STR("AB")
				321	s1_index = s1.index
				322	for x in _RANGE_1000:
				323	s1_index(s2)
				324
				325	@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
				326	def index_test_slow_match_two_characters(STR):
				327	s1 = STR("AB" * 300+"C")
				328	s2 = STR("BC")
				329	s1_index = s1.index
				330	for x in _RANGE_1000:
				331	s1_index(s2)
				332
				333	@bench('s="ABC"33; ((s+"D")500+s+"E").index(s+"E")',
				334	"late match, 100 characters", 100)
				335	def index_test_slow_match_100_characters(STR):
				336	m = STR("ABC"*33)
				337	d = STR("D")
				338	e = STR("E")
				339	s1 = (m+d)*500 + m+e
				340	s2 = m+e
				341	s1_index = s1.index
				342	for x in _RANGE_100:
				343	s1_index(s2)
				344
				345
				346	#### Same for rindex
				347
				348	@bench('("A"*1000).rindex("A")', "early match, single character", 1000)
				349	def rindex_test_quick_match_single_character(STR):
				350	s1 = STR("A" * 1000)
				351	s2 = STR("A")
				352	s1_rindex = s1.rindex
				353	for x in _RANGE_1000:
				354	s1_rindex(s2)
				355
				356	@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
				357	def rindex_test_quick_match_two_characters(STR):
				358	s1 = STR("AB" * 1000)
				359	s2 = STR("AB")
				360	s1_rindex = s1.rindex
				361	for x in _RANGE_1000:
				362	s1_rindex(s2)
				363
				364	@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
				365	def rindex_test_slow_match_two_characters(STR):
				366	s1 = STR("C" + "AB" * 300)
				367	s2 = STR("CA")
				368	s1_rindex = s1.rindex
				369	for x in _RANGE_1000:
				370	s1_rindex(s2)
				371
				372	@bench('s="ABC"33; ("E"+s+("D"+s)500).rindex("E"+s)',
				373	"late match, 100 characters", 100)
				374	def rindex_test_slow_match_100_characters(STR):
				375	m = STR("ABC"*33)
				376	d = STR("D")
				377	e = STR("E")
				378	s1 = e + m + (d+m)*500
				379	s2 = e + m
				380	s1_rindex = s1.rindex
				381	for x in _RANGE_100:
				382	s1_rindex(s2)
				383
				384
				385	#### Same for partition
				386
				387	@bench('("A"*1000).partition("A")', "early match, single character", 1000)
				388	def partition_test_quick_match_single_character(STR):
				389	s1 = STR("A" * 1000)
				390	s2 = STR("A")
				391	s1_partition = s1.partition
				392	for x in _RANGE_1000:
				393	s1_partition(s2)
				394
				395	@bench('("A"*1000).partition("B")', "no match, single character", 1000)
				396	def partition_test_no_match_single_character(STR):
				397	s1 = STR("A" * 1000)
				398	s2 = STR("B")
				399	s1_partition = s1.partition
				400	for x in _RANGE_1000:
				401	s1_partition(s2)
				402
				403
				404	@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
				405	def partition_test_quick_match_two_characters(STR):
				406	s1 = STR("AB" * 1000)
				407	s2 = STR("AB")
				408	s1_partition = s1.partition
				409	for x in _RANGE_1000:
				410	s1_partition(s2)
				411
				412	@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
				413	def partition_test_no_match_two_character(STR):
				414	s1 = STR("AB" * 1000)
				415	s2 = STR("BC")
				416	s1_partition = s1.partition
				417	for x in _RANGE_1000:
				418	s1_partition(s2)
				419
				420	@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
				421	def partition_test_slow_match_two_characters(STR):
				422	s1 = STR("AB" * 300+"C")
				423	s2 = STR("BC")
				424	s1_partition = s1.partition
				425	for x in _RANGE_1000:
				426	s1_partition(s2)
				427
				428	@bench('s="ABC"33; ((s+"D")500+s+"E").partition(s+"E")',
				429	"late match, 100 characters", 100)
				430	def partition_test_slow_match_100_characters(STR):
				431	m = STR("ABC"*33)
				432	d = STR("D")
				433	e = STR("E")
				434	s1 = (m+d)*500 + m+e
				435	s2 = m+e
				436	s1_partition = s1.partition
				437	for x in _RANGE_100:
				438	s1_partition(s2)
				439
				440
				441	#### Same for rpartition
				442
				443	@bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
				444	def rpartition_test_quick_match_single_character(STR):
				445	s1 = STR("A" * 1000)
				446	s2 = STR("A")
				447	s1_rpartition = s1.rpartition
				448	for x in _RANGE_1000:
				449	s1_rpartition(s2)
				450
				451	@bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
				452	def rpartition_test_no_match_single_character(STR):
				453	s1 = STR("A" * 1000)
				454	s2 = STR("B")
				455	s1_rpartition = s1.rpartition
				456	for x in _RANGE_1000:
				457	s1_rpartition(s2)
				458
				459
				460	@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
				461	def rpartition_test_quick_match_two_characters(STR):
				462	s1 = STR("AB" * 1000)
				463	s2 = STR("AB")
				464	s1_rpartition = s1.rpartition
				465	for x in _RANGE_1000:
				466	s1_rpartition(s2)
				467
				468	@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
				469	def rpartition_test_no_match_two_character(STR):
				470	s1 = STR("AB" * 1000)
				471	s2 = STR("BC")
				472	s1_rpartition = s1.rpartition
				473	for x in _RANGE_1000:
				474	s1_rpartition(s2)
				475
				476	@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
				477	def rpartition_test_slow_match_two_characters(STR):
				478	s1 = STR("C" + "AB" * 300)
				479	s2 = STR("CA")
				480	s1_rpartition = s1.rpartition
				481	for x in _RANGE_1000:
				482	s1_rpartition(s2)
				483
				484	@bench('s="ABC"33; ("E"+s+("D"+s)500).rpartition("E"+s)',
				485	"late match, 100 characters", 100)
				486	def rpartition_test_slow_match_100_characters(STR):
				487	m = STR("ABC"*33)
				488	d = STR("D")
				489	e = STR("E")
				490	s1 = e + m + (d+m)*500
				491	s2 = e + m
				492	s1_rpartition = s1.rpartition
				493	for x in _RANGE_100:
				494	s1_rpartition(s2)
				495
				496
				497	#### Same for split(s, 1)
				498
				499	@bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
				500	def split_test_quick_match_single_character(STR):
				501	s1 = STR("A" * 1000)
				502	s2 = STR("A")
				503	s1_split = s1.split
				504	for x in _RANGE_1000:
				505	s1_split(s2, 1)
				506
				507	@bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
				508	def split_test_no_match_single_character(STR):
				509	s1 = STR("A" * 1000)
				510	s2 = STR("B")
				511	s1_split = s1.split
				512	for x in _RANGE_1000:
				513	s1_split(s2, 1)
				514
				515
				516	@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
				517	def split_test_quick_match_two_characters(STR):
				518	s1 = STR("AB" * 1000)
				519	s2 = STR("AB")
				520	s1_split = s1.split
				521	for x in _RANGE_1000:
				522	s1_split(s2, 1)
				523
				524	@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
				525	def split_test_no_match_two_character(STR):
				526	s1 = STR("AB" * 1000)
				527	s2 = STR("BC")
				528	s1_split = s1.split
				529	for x in _RANGE_1000:
				530	s1_split(s2, 1)
				531
				532	@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
				533	def split_test_slow_match_two_characters(STR):
				534	s1 = STR("AB" * 300+"C")
				535	s2 = STR("BC")
				536	s1_split = s1.split
				537	for x in _RANGE_1000:
				538	s1_split(s2, 1)
				539
				540	@bench('s="ABC"33; ((s+"D")500+s+"E").split(s+"E", 1)',
				541	"late match, 100 characters", 100)
				542	def split_test_slow_match_100_characters(STR):
				543	m = STR("ABC"*33)
				544	d = STR("D")
				545	e = STR("E")
				546	s1 = (m+d)*500 + m+e
				547	s2 = m+e
				548	s1_split = s1.split
				549	for x in _RANGE_100:
				550	s1_split(s2, 1)
				551
				552
				553	#### Same for rsplit(s, 1)
				554
				555	@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
				556	def rsplit_test_quick_match_single_character(STR):
				557	s1 = STR("A" * 1000)
				558	s2 = STR("A")
				559	s1_rsplit = s1.rsplit
				560	for x in _RANGE_1000:
				561	s1_rsplit(s2, 1)
				562
				563	@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
				564	def rsplit_test_no_match_single_character(STR):
				565	s1 = STR("A" * 1000)
				566	s2 = STR("B")
				567	s1_rsplit = s1.rsplit
				568	for x in _RANGE_1000:
				569	s1_rsplit(s2, 1)
				570
				571
				572	@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
				573	def rsplit_test_quick_match_two_characters(STR):
				574	s1 = STR("AB" * 1000)
				575	s2 = STR("AB")
				576	s1_rsplit = s1.rsplit
				577	for x in _RANGE_1000:
				578	s1_rsplit(s2, 1)
				579
				580	@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
				581	def rsplit_test_no_match_two_character(STR):
				582	s1 = STR("AB" * 1000)
				583	s2 = STR("BC")
				584	s1_rsplit = s1.rsplit
				585	for x in _RANGE_1000:
				586	s1_rsplit(s2, 1)
				587
				588	@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
				589	def rsplit_test_slow_match_two_characters(STR):
				590	s1 = STR("C" + "AB" * 300)
				591	s2 = STR("CA")
				592	s1_rsplit = s1.rsplit
				593	for x in _RANGE_1000:
				594	s1_rsplit(s2, 1)
				595
				596	@bench('s="ABC"33; ("E"+s+("D"+s)500).rsplit("E"+s, 1)',
				597	"late match, 100 characters", 100)
				598	def rsplit_test_slow_match_100_characters(STR):
				599	m = STR("ABC"*33)
				600	d = STR("D")
				601	e = STR("E")
				602	s1 = e + m + (d+m)*500
				603	s2 = e + m
				604	s1_rsplit = s1.rsplit
				605	for x in _RANGE_100:
				606	s1_rsplit(s2, 1)
				607
				608
				609	#### Benchmark the operator-based methods
				610
				611	@bench('"A"*10', "repeat 1 character 10 times", 1000)
				612	def repeat_single_10_times(STR):
				613	s = STR("A")
				614	for x in _RANGE_1000:
				615	s * 10
				616
				617	@bench('"A"*1000', "repeat 1 character 1000 times", 1000)
				618	def repeat_single_1000_times(STR):
				619	s = STR("A")
				620	for x in _RANGE_1000:
				621	s * 1000
				622
				623	@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
				624	def repeat_5_10_times(STR):
				625	s = STR("ABCDE")
				626	for x in _RANGE_1000:
				627	s * 10
				628
				629	@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
				630	def repeat_5_1000_times(STR):
				631	s = STR("ABCDE")
				632	for x in _RANGE_1000:
				633	s * 1000
				634
				635	# + for concat
				636
				637	@bench('"Andrew"+"Dalke"', "concat two strings", 1000)
				638	def concat_two_strings(STR):
				639	s1 = STR("Andrew")
				640	s2 = STR("Dalke")
				641	for x in _RANGE_1000:
				642	s1+s2
				643
				644	@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
				645	1000)
				646	def concat_many_strings(STR):
				647	s1=STR('TIXSGYNREDCVBHJ')
				648	s2=STR('PUMTLXBZVDO')
				649	s3=STR('FVZNJ')
				650	s4=STR('OGDXUW')
				651	s5=STR('WEIMRNCOYVGHKB')
				652	s6=STR('FCQTNMXPUZH')
				653	s7=STR('TICZJYRLBNVUEAK')
				654	s8=STR('REYB')
				655	s9=STR('PWUOQ')
				656	s10=STR('EQHCMKBS')
				657	s11=STR('AEVDFOH')
				658	s12=STR('IFHVD')
				659	s13=STR('JGTCNLXWOHQ')
				660	s14=STR('ITSKEPYLROZAWXF')
				661	s15=STR('THEK')
				662	s16=STR('GHPZFBUYCKMNJIT')
				663	s17=STR('JMUZ')
				664	s18=STR('WLZQMTB')
				665	s19=STR('KPADCBW')
				666	s20=STR('TNJHZQAGBU')
				667	for x in _RANGE_1000:
				668	(s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
				669	s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
				670
				671
				672	#### Benchmark join
				673
				674	def get_bytes_yielding_seq(STR, arg):
				675	if STR is BYTES and sys.version_info >= (3,):
				676	raise UnsupportedType
				677	return STR(arg)
				678
				679	@bench('"A".join("")',
				680	"join empty string, with 1 character sep", 100)
				681	def join_empty_single(STR):
				682	sep = STR("A")
				683	s2 = get_bytes_yielding_seq(STR, "")
				684	sep_join = sep.join
				685	for x in _RANGE_100:
				686	sep_join(s2)
				687
				688	@bench('"ABCDE".join("")',
				689	"join empty string, with 5 character sep", 100)
				690	def join_empty_5(STR):
				691	sep = STR("ABCDE")
				692	s2 = get_bytes_yielding_seq(STR, "")
				693	sep_join = sep.join
				694	for x in _RANGE_100:
				695	sep_join(s2)
				696
				697	@bench('"A".join("ABC..Z")',
				698	"join string with 26 characters, with 1 character sep", 1000)
				699	def join_alphabet_single(STR):
				700	sep = STR("A")
				701	s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
				702	sep_join = sep.join
				703	for x in _RANGE_1000:
				704	sep_join(s2)
				705
				706	@bench('"ABCDE".join("ABC..Z")',
				707	"join string with 26 characters, with 5 character sep", 1000)
				708	def join_alphabet_5(STR):
				709	sep = STR("ABCDE")
				710	s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
				711	sep_join = sep.join
				712	for x in _RANGE_1000:
				713	sep_join(s2)
				714
				715	@bench('"A".join(list("ABC..Z"))',
				716	"join list of 26 characters, with 1 character sep", 1000)
				717	def join_alphabet_list_single(STR):
				718	sep = STR("A")
				719	s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
				720	sep_join = sep.join
				721	for x in _RANGE_1000:
				722	sep_join(s2)
				723
				724	@bench('"ABCDE".join(list("ABC..Z"))',
				725	"join list of 26 characters, with 5 character sep", 1000)
				726	def join_alphabet_list_five(STR):
				727	sep = STR("ABCDE")
				728	s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
				729	sep_join = sep.join
				730	for x in _RANGE_1000:
				731	sep_join(s2)
				732
				733	@bench('"A".join(["Bob"]*100))',
				734	"join list of 100 words, with 1 character sep", 1000)
				735	def join_100_words_single(STR):
				736	sep = STR("A")
				737	s2 = [STR("Bob")]*100
				738	sep_join = sep.join
				739	for x in _RANGE_1000:
				740	sep_join(s2)
				741
				742	@bench('"ABCDE".join(["Bob"]*100))',
				743	"join list of 100 words, with 5 character sep", 1000)
				744	def join_100_words_5(STR):
				745	sep = STR("ABCDE")
				746	s2 = [STR("Bob")]*100
				747	sep_join = sep.join
				748	for x in _RANGE_1000:
				749	sep_join(s2)
				750
				751	#### split tests
				752
				753	@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
				754	def whitespace_split(STR):
				755	s = STR("Here are some words. "*2)
				756	s_split = s.split
				757	for x in _RANGE_1000:
				758	s_split()
				759
				760	@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
				761	def whitespace_rsplit(STR):
				762	s = STR("Here are some words. "*2)
				763	s_rsplit = s.rsplit
				764	for x in _RANGE_1000:
				765	s_rsplit()
				766
				767	@bench('("Here are some words. "*2).split(None, 1)',
				768	"split 1 whitespace", 1000)
				769	def whitespace_split_1(STR):
				770	s = STR("Here are some words. "*2)
				771	s_split = s.split
				772	N = None
				773	for x in _RANGE_1000:
				774	s_split(N, 1)
				775
				776	@bench('("Here are some words. "*2).rsplit(None, 1)',
				777	"split 1 whitespace", 1000)
				778	def whitespace_rsplit_1(STR):
				779	s = STR("Here are some words. "*2)
				780	s_rsplit = s.rsplit
				781	N = None
				782	for x in _RANGE_1000:
				783	s_rsplit(N, 1)
				784
				785	@bench('("Here are some words. "*2).partition(" ")',
				786	"split 1 whitespace", 1000)
				787	def whitespace_partition(STR):
				788	sep = STR(" ")
				789	s = STR("Here are some words. "*2)
				790	s_partition = s.partition
				791	for x in _RANGE_1000:
				792	s_partition(sep)
				793
				794	@bench('("Here are some words. "*2).rpartition(" ")',
				795	"split 1 whitespace", 1000)
				796	def whitespace_rpartition(STR):
				797	sep = STR(" ")
				798	s = STR("Here are some words. "*2)
				799	s_rpartition = s.rpartition
				800	for x in _RANGE_1000:
				801	s_rpartition(sep)
				802
				803	human_text = """\
				804	Python is a dynamic object-oriented programming language that can be
				805	used for many kinds of software development. It offers strong support
				806	for integration with other languages and tools, comes with extensive
				807	standard libraries, and can be learned in a few days. Many Python
				808	programmers report substantial productivity gains and feel the language
				809	encourages the development of higher quality, more maintainable code.
				810
				811	Python runs on Windows, Linux/Unix, Mac OS X, OS/2, Amiga, Palm
				812	Handhelds, and Nokia mobile phones. Python has also been ported to the
				813	Java and .NET virtual machines.
				814
				815	Python is distributed under an OSI-approved open source license that
				816	makes it free to use, even for commercial products.
				817	"""*25
				818	human_text_bytes = bytes_from_str(human_text)
				819	human_text_unicode = unicode_from_str(human_text)
				820	def _get_human_text(STR):
				821	if STR is UNICODE:
				822	return human_text_unicode
				823	if STR is BYTES:
				824	return human_text_bytes
				825	raise AssertionError
				826
				827	@bench('human_text.split()', "split whitespace (huge)", 10)
				828	def whitespace_split_huge(STR):
				829	s = _get_human_text(STR)
				830	s_split = s.split
				831	for x in _RANGE_10:
				832	s_split()
				833
				834	@bench('human_text.rsplit()', "split whitespace (huge)", 10)
				835	def whitespace_rsplit_huge(STR):
				836	s = _get_human_text(STR)
				837	s_rsplit = s.rsplit
				838	for x in _RANGE_10:
				839	s_rsplit()
				840
				841
				842
				843	@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
				844	def newlines_split(STR):
				845	s = STR("this\nis\na\ntest\n")
				846	s_split = s.split
				847	nl = STR("\n")
				848	for x in _RANGE_1000:
				849	s_split(nl)
				850
				851
				852	@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
				853	def newlines_rsplit(STR):
				854	s = STR("this\nis\na\ntest\n")
				855	s_rsplit = s.rsplit
				856	nl = STR("\n")
				857	for x in _RANGE_1000:
				858	s_rsplit(nl)
				859
				860	@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
				861	def newlines_splitlines(STR):
				862	s = STR("this\nis\na\ntest\n")
				863	s_splitlines = s.splitlines
				864	for x in _RANGE_1000:
				865	s_splitlines()
				866
				867	## split text with 2000 newlines
				868
				869	def _make_2000_lines():
				870	import random
				871	r = random.Random(100)
				872	chars = list(map(chr, range(32, 128)))
				873	i = 0
				874	while i < len(chars):
				875	chars[i] = " "
				876	i += r.randrange(9)
				877	s = "".join(chars)
				878	s = s*4
				879	words = []
				880	for i in range(2000):
				881	start = r.randrange(96)
				882	n = r.randint(5, 65)
				883	words.append(s[start:start+n])
				884	return "\n".join(words)+"\n"
				885
				886	_text_with_2000_lines = _make_2000_lines()
				887	_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
				888	_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
				889	def _get_2000_lines(STR):
				890	if STR is UNICODE:
				891	return _text_with_2000_lines_unicode
				892	if STR is BYTES:
				893	return _text_with_2000_lines_bytes
				894	raise AssertionError
				895
				896
				897	@bench('"...text...".split("\\n")', "split 2000 newlines", 10)
				898	def newlines_split_2000(STR):
				899	s = _get_2000_lines(STR)
				900	s_split = s.split
				901	nl = STR("\n")
				902	for x in _RANGE_10:
				903	s_split(nl)
				904
				905	@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
				906	def newlines_rsplit_2000(STR):
				907	s = _get_2000_lines(STR)
				908	s_rsplit = s.rsplit
				909	nl = STR("\n")
				910	for x in _RANGE_10:
				911	s_rsplit(nl)
				912
				913	@bench('"...text...".splitlines()', "split 2000 newlines", 10)
				914	def newlines_splitlines_2000(STR):
				915	s = _get_2000_lines(STR)
				916	s_splitlines = s.splitlines
				917	for x in _RANGE_10:
				918	s_splitlines()
				919
				920
				921	## split text on "--" characters
				922	@bench(
				923	'"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
				924	"split on multicharacter separator (small)", 1000)
				925	def split_multichar_sep_small(STR):
				926	s = STR("this--is--a--test--of--the--emergency--broadcast--system")
				927	s_split = s.split
				928	pat = STR("--")
				929	for x in _RANGE_1000:
				930	s_split(pat)
				931	@bench(
				932	'"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
				933	"split on multicharacter separator (small)", 1000)
				934	def rsplit_multichar_sep_small(STR):
				935	s = STR("this--is--a--test--of--the--emergency--broadcast--system")
				936	s_rsplit = s.rsplit
				937	pat = STR("--")
				938	for x in _RANGE_1000:
				939	s_rsplit(pat)
				940
				941	## split dna text on "ACTAT" characters
				942	@bench('dna.split("ACTAT")',
				943	"split on multicharacter separator (dna)", 10)
				944	def split_multichar_sep_dna(STR):
				945	s = _get_dna(STR)
				946	s_split = s.split
				947	pat = STR("ACTAT")
				948	for x in _RANGE_10:
				949	s_split(pat)
				950
				951	@bench('dna.rsplit("ACTAT")',
				952	"split on multicharacter separator (dna)", 10)
				953	def rsplit_multichar_sep_dna(STR):
				954	s = _get_dna(STR)
				955	s_rsplit = s.rsplit
				956	pat = STR("ACTAT")
				957	for x in _RANGE_10:
				958	s_rsplit(pat)
				959
				960
				961
				962	## split with limits
				963
				964	GFF3_example = "\t".join([
				965	"I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
				966	"ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
				967
				968	@bench('GFF3_example.split("\\t")', "tab split", 1000)
				969	def tab_split_no_limit(STR):
				970	sep = STR("\t")
				971	s = STR(GFF3_example)
				972	s_split = s.split
				973	for x in _RANGE_1000:
				974	s_split(sep)
				975
				976	@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
				977	def tab_split_limit(STR):
				978	sep = STR("\t")
				979	s = STR(GFF3_example)
				980	s_split = s.split
				981	for x in _RANGE_1000:
				982	s_split(sep, 8)
				983
				984	@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
				985	def tab_rsplit_no_limit(STR):
				986	sep = STR("\t")
				987	s = STR(GFF3_example)
				988	s_rsplit = s.rsplit
				989	for x in _RANGE_1000:
				990	s_rsplit(sep)
				991
				992	@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
				993	def tab_rsplit_limit(STR):
				994	sep = STR("\t")
				995	s = STR(GFF3_example)
				996	s_rsplit = s.rsplit
				997	for x in _RANGE_1000:
				998	s_rsplit(sep, 8)
				999
				1000	#### Count characters
				1001
				1002	@bench('...text.with.2000.newlines.count("\\n")',
				1003	"count newlines", 10)
				1004	def count_newlines(STR):
				1005	s = _get_2000_lines(STR)
				1006	s_count = s.count
				1007	nl = STR("\n")
				1008	for x in _RANGE_10:
				1009	s_count(nl)
				1010
				1011	# Orchid sequences concatenated, from Biopython
				1012	_dna = """
				1013	CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
				1014	AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
				1015	AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
				1016	TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
				1017	AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
				1018	TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
				1019	CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
				1020	TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
				1021	GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
				1022	TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
				1023	GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
				1024	ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
				1025	CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
				1026	ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
				1027	ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
				1028	TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
				1029	CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
				1030	GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
				1031	ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
				1032	ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
				1033	ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
				1034	GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
				1035	TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
				1036	TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
				1037	TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
				1038	GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
				1039	GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
				1040	AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
				1041	GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
				1042	TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
				1043	CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
				1044	TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
				1045	TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
				1046	AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
				1047	GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
				1048	GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
				1049	CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
				1050	GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
				1051	TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
				1052	ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
				1053	GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
				1054	AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
				1055	AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
				1056	ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
				1057	GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
				1058	GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
				1059	AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
				1060	GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
				1061	ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
				1062	GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
				1063	"""
				1064	_dna = "".join(_dna.splitlines())
				1065	_dna = _dna * 25
				1066	_dna_bytes = bytes_from_str(_dna)
				1067	_dna_unicode = unicode_from_str(_dna)
				1068
				1069	def _get_dna(STR):
				1070	if STR is UNICODE:
				1071	return _dna_unicode
				1072	if STR is BYTES:
				1073	return _dna_bytes
				1074	raise AssertionError
				1075
				1076	@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
				1077	def count_aact(STR):
				1078	seq = _get_dna(STR)
				1079	seq_count = seq.count
				1080	needle = STR("AACT")
				1081	for x in _RANGE_10:
				1082	seq_count(needle)
				1083
				1084	##### startswith and endswith
				1085
				1086	@bench('"Andrew".startswith("A")', 'startswith single character', 1000)
				1087	def startswith_single(STR):
				1088	s1 = STR("Andrew")
				1089	s2 = STR("A")
				1090	s1_startswith = s1.startswith
				1091	for x in _RANGE_1000:
				1092	s1_startswith(s2)
				1093
				1094	@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
				1095	1000)
				1096	def startswith_multiple(STR):
				1097	s1 = STR("Andrew")
				1098	s2 = STR("Andrew")
				1099	s1_startswith = s1.startswith
				1100	for x in _RANGE_1000:
				1101	s1_startswith(s2)
				1102
				1103	@bench('"Andrew".startswith("Anders")',
				1104	'startswith multiple characters - not!', 1000)
				1105	def startswith_multiple_not(STR):
				1106	s1 = STR("Andrew")
				1107	s2 = STR("Anders")
				1108	s1_startswith = s1.startswith
				1109	for x in _RANGE_1000:
				1110	s1_startswith(s2)
				1111
				1112
				1113	# endswith
				1114
				1115	@bench('"Andrew".endswith("w")', 'endswith single character', 1000)
				1116	def endswith_single(STR):
				1117	s1 = STR("Andrew")
				1118	s2 = STR("w")
				1119	s1_endswith = s1.endswith
				1120	for x in _RANGE_1000:
				1121	s1_endswith(s2)
				1122
				1123	@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
				1124	def endswith_multiple(STR):
				1125	s1 = STR("Andrew")
				1126	s2 = STR("Andrew")
				1127	s1_endswith = s1.endswith
				1128	for x in _RANGE_1000:
				1129	s1_endswith(s2)
				1130
				1131	@bench('"Andrew".endswith("Anders")',
				1132	'endswith multiple characters - not!', 1000)
				1133	def endswith_multiple_not(STR):
				1134	s1 = STR("Andrew")
				1135	s2 = STR("Anders")
				1136	s1_endswith = s1.endswith
				1137	for x in _RANGE_1000:
				1138	s1_endswith(s2)
				1139
				1140	#### Strip
				1141
				1142	@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
				1143	def terminal_newline_strip_right(STR):
				1144	s = STR("Hello!\n")
				1145	s_strip = s.strip
				1146	for x in _RANGE_1000:
				1147	s_strip()
				1148
				1149	@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
				1150	def terminal_newline_rstrip(STR):
				1151	s = STR("Hello!\n")
				1152	s_rstrip = s.rstrip
				1153	for x in _RANGE_1000:
				1154	s_rstrip()
				1155
				1156	@bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
				1157	def terminal_newline_strip_left(STR):
				1158	s = STR("\nHello!")
				1159	s_strip = s.strip
				1160	for x in _RANGE_1000:
				1161	s_strip()
				1162
				1163	@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
				1164	def terminal_newline_strip_both(STR):
				1165	s = STR("\nHello!\n")
				1166	s_strip = s.strip
				1167	for x in _RANGE_1000:
				1168	s_strip()
				1169
				1170	@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
				1171	def terminal_newline_lstrip(STR):
				1172	s = STR("\nHello!")
				1173	s_lstrip = s.lstrip
				1174	for x in _RANGE_1000:
				1175	s_lstrip()
				1176
				1177	@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
				1178	'strip terminal newline', 1000)
				1179	def terminal_newline_if_else(STR):
				1180	s = STR("Hello!\n")
				1181	NL = STR("\n")
				1182	for x in _RANGE_1000:
				1183	s[:-1] if (s[-1] == NL) else s
				1184
				1185
				1186	# Strip multiple spaces or tabs
				1187
				1188	@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000)
				1189	def terminal_space_strip(STR):
				1190	s = STR("Hello\t \t!")
				1191	s_strip = s.strip
				1192	for x in _RANGE_1000:
				1193	s_strip()
				1194
				1195	@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
				1196	def terminal_space_rstrip(STR):
				1197	s = STR("Hello!\t \t")
				1198	s_rstrip = s.rstrip
				1199	for x in _RANGE_1000:
				1200	s_rstrip()
				1201
				1202	@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
				1203	def terminal_space_lstrip(STR):
				1204	s = STR("\t \tHello!")
				1205	s_lstrip = s.lstrip
				1206	for x in _RANGE_1000:
				1207	s_lstrip()
				1208
				1209
				1210	#### replace
				1211	@bench('"This is a test".replace(" ", "\\t")', 'replace single character',
				1212	1000)
				1213	def replace_single_character(STR):
				1214	s = STR("This is a test!")
				1215	from_str = STR(" ")
				1216	to_str = STR("\t")
				1217	s_replace = s.replace
				1218	for x in _RANGE_1000:
				1219	s_replace(from_str, to_str)
				1220
				1221	@uses_re
				1222	@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
				1223	1000)
				1224	def replace_single_character_re(STR):
				1225	s = STR("This is a test!")
				1226	pat = re.compile(STR(" "))
				1227	to_str = STR("\t")
				1228	pat_sub = pat.sub
				1229	for x in _RANGE_1000:
				1230	pat_sub(to_str, s)
				1231
				1232	@bench('"...text.with.2000.lines...replace("\\n", " ")',
				1233	'replace single character, big string', 10)
				1234	def replace_single_character_big(STR):
				1235	s = _get_2000_lines(STR)
				1236	from_str = STR("\n")
				1237	to_str = STR(" ")
				1238	s_replace = s.replace
				1239	for x in _RANGE_10:
				1240	s_replace(from_str, to_str)
				1241
				1242	@uses_re
				1243	@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
				1244	'replace single character, big string', 10)
				1245	def replace_single_character_big_re(STR):
				1246	s = _get_2000_lines(STR)
				1247	pat = re.compile(STR("\n"))
				1248	to_str = STR(" ")
				1249	pat_sub = pat.sub
				1250	for x in _RANGE_10:
				1251	pat_sub(to_str, s)
				1252
				1253
				1254	@bench('dna.replace("ATC", "ATT")',
				1255	'replace multiple characters, dna', 10)
				1256	def replace_multiple_characters_dna(STR):
				1257	seq = _get_dna(STR)
				1258	from_str = STR("ATC")
				1259	to_str = STR("ATT")
				1260	seq_replace = seq.replace
				1261	for x in _RANGE_10:
				1262	seq_replace(from_str, to_str)
				1263
				1264	# This increases the character count
				1265	@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
				1266	'replace and expand multiple characters, big string', 10)
				1267	def replace_multiple_character_big(STR):
				1268	s = _get_2000_lines(STR)
				1269	from_str = STR("\n")
				1270	to_str = STR("\r\n")
				1271	s_replace = s.replace
				1272	for x in _RANGE_10:
				1273	s_replace(from_str, to_str)
				1274
				1275
				1276	# This decreases the character count
				1277	@bench('"When shall we three meet again?".replace("ee", "")',
				1278	'replace/remove multiple characters', 1000)
				1279	def replace_multiple_character_remove(STR):
				1280	s = STR("When shall we three meet again?")
				1281	from_str = STR("ee")
				1282	to_str = STR("")
				1283	s_replace = s.replace
				1284	for x in _RANGE_1000:
				1285	s_replace(from_str, to_str)
				1286
				1287
				1288	big_s = "A" + ("Z"1281024)
				1289	big_s_bytes = bytes_from_str(big_s)
				1290	big_s_unicode = unicode_from_str(big_s)
				1291	def _get_big_s(STR):
				1292	if STR is UNICODE: return big_s_unicode
				1293	if STR is BYTES: return big_s_bytes
				1294	raise AssertionError
				1295
				1296	# The older replace implementation counted all matches in
				1297	# the string even when it only neeed to make one replacement.
				1298	@bench('("A" + ("Z"1281024)).replace("A", "BB", 1)',
				1299	'quick replace single character match', 10)
				1300	def quick_replace_single_match(STR):
				1301	s = _get_big_s(STR)
				1302	from_str = STR("A")
				1303	to_str = STR("BB")
				1304	s_replace = s.replace
				1305	for x in _RANGE_10:
				1306	s_replace(from_str, to_str, 1)
				1307
				1308	@bench('("A" + ("Z"1281024)).replace("AZZ", "BBZZ", 1)',
				1309	'quick replace multiple character match', 10)
				1310	def quick_replace_multiple_match(STR):
				1311	s = _get_big_s(STR)
				1312	from_str = STR("AZZ")
				1313	to_str = STR("BBZZ")
				1314	s_replace = s.replace
				1315	for x in _RANGE_10:
				1316	s_replace(from_str, to_str, 1)
				1317
				1318
				1319	####
				1320
				1321	# CCP does a lot of this, for internationalisation of ingame messages.
				1322	_format = "The %(thing)s is %(place)s the %(location)s."
				1323	_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
				1324	_format_bytes = bytes_from_str(_format)
				1325	_format_unicode = unicode_from_str(_format)
				1326	_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
				1327	_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
				1328
				1329	def _get_format(STR):
				1330	if STR is UNICODE:
				1331	return _format_unicode
				1332	if STR is BYTES:
				1333	if sys.version_info >= (3,):
				1334	raise UnsupportedType
				1335	return _format_bytes
				1336	raise AssertionError
				1337
				1338	def _get_format_dict(STR):
				1339	if STR is UNICODE:
				1340	return _format_dict_unicode
				1341	if STR is BYTES:
				1342	if sys.version_info >= (3,):
				1343	raise UnsupportedType
				1344	return _format_dict_bytes
				1345	raise AssertionError
				1346
				1347	# Formatting.
				1348	@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
				1349	'formatting a string type with a dict', 1000)
				1350	def format_with_dict(STR):
				1351	s = _get_format(STR)
				1352	d = _get_format_dict(STR)
				1353	for x in _RANGE_1000:
				1354	s % d
				1355
				1356
				1357	#### Upper- and lower- case conversion
				1358
				1359	@bench('("Where in the world is Carmen San Deigo?"*10).lower()',
				1360	"case conversion -- rare", 1000)
				1361	def lower_conversion_rare(STR):
				1362	s = STR("Where in the world is Carmen San Deigo?"*10)
				1363	s_lower = s.lower
				1364	for x in _RANGE_1000:
				1365	s_lower()
				1366
				1367	@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
				1368	"case conversion -- dense", 1000)
				1369	def lower_conversion_dense(STR):
				1370	s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
				1371	s_lower = s.lower
				1372	for x in _RANGE_1000:
				1373	s_lower()
				1374
				1375
				1376	@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
				1377	"case conversion -- rare", 1000)
				1378	def upper_conversion_rare(STR):
				1379	s = STR("Where in the world is Carmen San Deigo?"*10)
				1380	s_upper = s.upper
				1381	for x in _RANGE_1000:
				1382	s_upper()
				1383
				1384	@bench('("where in the world is carmen san deigo?"*10).upper()',
				1385	"case conversion -- dense", 1000)
				1386	def upper_conversion_dense(STR):
				1387	s = STR("where in the world is carmen san deigo?"*10)
				1388	s_upper = s.upper
				1389	for x in _RANGE_1000:
				1390	s_upper()
				1391
				1392
				1393	# end of benchmarks
				1394
				1395	#################
				1396
				1397	class BenchTimer(timeit.Timer):
				1398	def best(self, repeat=1):
				1399	for i in range(1, 10):
				1400	number = 10**i
				1401	x = self.timeit(number)
				1402	if x > 0.02:
				1403	break
				1404	times = [x]
				1405	for i in range(1, repeat):
				1406	times.append(self.timeit(number))
				1407	return min(times) / number
				1408
				1409	def main():
				1410	(options, test_names) = parser.parse_args()
				1411	if options.bytes_only and options.unicode_only:
				1412	raise SystemExit("Only one of --8-bit and --unicode are allowed")
				1413
				1414	bench_functions = []
				1415	for (k,v) in globals().items():
				1416	if hasattr(v, "is_bench"):
				1417	if test_names:
				1418	for name in test_names:
				1419	if name in v.group:
				1420	break
				1421	else:
				1422	# Not selected, ignore
				1423	continue
				1424	if options.skip_re and hasattr(v, "uses_re"):
				1425	continue
				1426
				1427	bench_functions.append( (v.group, k, v) )
				1428	bench_functions.sort()
				1429
				1430	p("bytes\tunicode")
				1431	p("(in ms)\t(in ms)\t%\tcomment")
				1432
				1433	bytes_total = uni_total = 0.0
				1434
				1435	for title, group in itertools.groupby(bench_functions,
				1436	operator.itemgetter(0)):
				1437	# Flush buffer before each group
				1438	sys.stdout.flush()
				1439	p("="*10, title)
				1440	for (_, k, v) in group:
				1441	if hasattr(v, "is_bench"):
				1442	bytes_time = 0.0
				1443	bytes_time_s = " - "
				1444	if not options.unicode_only:
				1445	try:
				1446	bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
				1447	"import __main__").best(REPEAT)
				1448	bytes_time_s = "%.2f" % (1000 * bytes_time)
				1449	bytes_total += bytes_time
				1450	except UnsupportedType:
				1451	bytes_time_s = "N/A"
				1452	uni_time = 0.0
				1453	uni_time_s = " - "
				1454	if not options.bytes_only:
				1455	try:
				1456	uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
				1457	"import __main__").best(REPEAT)
				1458	uni_time_s = "%.2f" % (1000 * uni_time)
				1459	uni_total += uni_time
				1460	except UnsupportedType:
				1461	uni_time_s = "N/A"
				1462	try:
				1463	average = bytes_time/uni_time
				1464	except (TypeError, ZeroDivisionError):
				1465	average = 0.0
				1466	p("%s\t%s\t%.1f\t%s (*%d)" % (
				1467	bytes_time_s, uni_time_s, 100.*average,
				1468	v.comment, v.repeat_count))
				1469
				1470	if bytes_total == uni_total == 0.0:
				1471	p("That was zippy!")
				1472	else:
				1473	try:
				1474	ratio = bytes_total/uni_total
				1475	except ZeroDivisionError:
				1476	ratio = 0.0
				1477	p("%.2f\t%.2f\t%.1f\t%s" % (
				1478	1000bytes_total, 1000uni_total, 100.*ratio,
				1479	"TOTAL"))
				1480
				1481	if __name__ == "__main__":
				1482	main()