blob: 953e4fdffe1ea9ce8f41545a7ad9572cfcbfe38e [file] [log] [blame]
Guido van Rossum337c6d41997-07-15 18:42:58 +00001#!/usr/bin/env python
2# -*- mode: python -*-
Guido van Rossum337c6d41997-07-15 18:42:58 +00003
Guido van Rossum8430c581998-04-03 21:47:12 +00004# Re test suite and benchmark suite v1.5
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005
6# The 3 possible outcomes for each pattern
7[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
8
9# Benchmark suite (needs expansion)
10#
11# The benchmark suite does not test correctness, just speed. The
Fredrik Lundh2643b552000-08-08 16:52:51 +000012# first element of each tuple is the regex pattern; the second is a
Guido van Rossum8e0ce301997-07-11 19:34:44 +000013# string to match it against. The benchmarking code will embed the
Fredrik Lundh2643b552000-08-08 16:52:51 +000014# second string inside several sizes of padding, to test how regex
Guido van Rossum8e0ce301997-07-11 19:34:44 +000015# matching performs on large strings.
16
17benchmarks = [
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000018
19 # test common prefix
20 ('Python|Perl', 'Perl'), # Alternation
21 ('(Python|Perl)', 'Perl'), # Grouped alternation
22
23 ('Python|Perl|Tcl', 'Perl'), # Alternation
24 ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation
25
26 ('(Python)\\1', 'PythonPython'), # Backreference
27 ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
28 ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
29
Fredrik Lundh2643b552000-08-08 16:52:51 +000030 ('Python', 'Python'), # Simple text literal
31 ('.*Python', 'Python'), # Bad text literal
32 ('.*Python.*', 'Python'), # Worse text literal
33 ('.*(Python)', 'Python'), # Bad text literal with grouping
34
Guido van Rossum8e0ce301997-07-11 19:34:44 +000035]
36
37# Test suite (for verifying correctness)
38#
39# The test suite is a list of 5- or 3-tuples. The 5 parts of a
40# complete tuple are:
41# element 0: a string containing the pattern
42# 1: the string to match against the pattern
43# 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
44# 3: a string that will be eval()'ed to produce a test string.
45# This is an arbitrary Python expression; the available
46# variables are "found" (the whole match), and "g1", "g2", ...
Guido van Rossum16bd0ff1997-07-15 18:45:20 +000047# up to "g99" contain the contents of each group, or the
48# string 'None' if the group wasn't given a value, or the
49# string 'Error' if the group index was out of range;
50# also "groups", the return value of m.group() (a tuple).
Guido van Rossum8e0ce301997-07-11 19:34:44 +000051# 4: The expected result of evaluating the expression.
52# If the two don't match, an error is reported.
53#
54# If the regex isn't expected to work, the latter two elements can be omitted.
55
Fredrik Lundh2643b552000-08-08 16:52:51 +000056tests = [
Guido van Rossumdfa67901997-12-08 17:12:06 +000057 # Test ?P< and ?P= extensions
58 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier
59 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit
60 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
61 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
62
63 # Same tests, for the ?P= form
64 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
65 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
66 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
67 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group
68
69 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
70 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
Guido van Rossum8430c581998-04-03 21:47:12 +000071
Guido van Rossumdfa67901997-12-08 17:12:06 +000072 # Test octal escapes
Guido van Rossum8430c581998-04-03 21:47:12 +000073 ('\\1', 'a', SYNTAX_ERROR), # Backreference
74 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character
Guido van Rossumdfa67901997-12-08 17:12:06 +000075 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
76 ('\\141', 'a', SUCCEED, 'found', 'a'),
77 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
78
Guido van Rossum8430c581998-04-03 21:47:12 +000079 # Test \0 is handled everywhere
Guido van Rossumdfa67901997-12-08 17:12:06 +000080 (r'\0', '\0', SUCCEED, 'found', '\0'),
Guido van Rossumdfa67901997-12-08 17:12:06 +000081 (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
82 (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
83 (r'[^a\0]', '\0', FAIL),
Fredrik Lundh2643b552000-08-08 16:52:51 +000084
Guido van Rossumdfa67901997-12-08 17:12:06 +000085 # Test various letter escapes
86 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
87 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000088 # NOTE: not an error under PCRE/PRE:
89 # (r'\u', '', SYNTAX_ERROR), # A Perl escape
Guido van Rossumdfa67901997-12-08 17:12:06 +000090 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
91 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000092 # new \x semantics
93 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
94 (r'\x00f', '\017', FAIL, 'found', chr(15)),
95 (r'\x00fe', '\376', FAIL, 'found', chr(254)),
96 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
97 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
98 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
Guido van Rossumdfa67901997-12-08 17:12:06 +000099
Fredrik Lundh2643b552000-08-08 16:52:51 +0000100 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
Guido van Rossumdfa67901997-12-08 17:12:06 +0000101 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000102
Guido van Rossumdfa67901997-12-08 17:12:06 +0000103 # Test that . only matches \n in DOTALL mode
104 ('a.b', 'acb', SUCCEED, 'found', 'acb'),
105 ('a.b', 'a\nb', FAIL),
106 ('a.*b', 'acc\nccb', FAIL),
107 ('a.{4,5}b', 'acc\nccb', FAIL),
108 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
109 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
110 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
111 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
112 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
113
Guido van Rossum8430c581998-04-03 21:47:12 +0000114 (')', '', SYNTAX_ERROR), # Unmatched right bracket
115 ('', '', SUCCEED, 'found', ''), # Empty pattern
Guido van Rossum337c6d41997-07-15 18:42:58 +0000116 ('abc', 'abc', SUCCEED, 'found', 'abc'),
117 ('abc', 'xbc', FAIL),
118 ('abc', 'axc', FAIL),
119 ('abc', 'abx', FAIL),
120 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
121 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
122 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
123 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
124 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
125 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
126 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
127 ('ab+bc', 'abc', FAIL),
128 ('ab+bc', 'abq', FAIL),
129 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
130 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
131 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
132 ('ab?bc', 'abbbbc', FAIL),
133 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
134 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
135 ('^abc$', 'abcc', FAIL),
136 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
137 ('^abc$', 'aabc', FAIL),
138 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
139 ('^', 'abc', SUCCEED, 'found+"-"', '-'),
140 ('$', 'abc', SUCCEED, 'found+"-"', '-'),
141 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
142 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
143 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
144 ('a.*c', 'axyzd', FAIL),
145 ('a[bc]d', 'abc', FAIL),
146 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
147 ('a[b-d]e', 'abd', FAIL),
148 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
149 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000150 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000151 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000152 # NOTE: not an error under PCRE/PRE:
153 # ('a[b-]', 'a-', SYNTAX_ERROR),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000154 ('a[]b', '-', SYNTAX_ERROR),
155 ('a[', '-', SYNTAX_ERROR),
156 ('a\\', '-', SYNTAX_ERROR),
157 ('abc)', '-', SYNTAX_ERROR),
158 ('(abc', '-', SYNTAX_ERROR),
159 ('a]', 'a]', SUCCEED, 'found', 'a]'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000160 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000161 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
162 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
163 ('a[^bc]d', 'abd', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000164 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
165 ('a[^-b]c', 'a-c', FAIL),
166 ('a[^]b]c', 'a]c', FAIL),
167 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000168 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
169 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
170 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
171 ('\\by\\b', 'xy', FAIL),
172 ('\\by\\b', 'yz', FAIL),
173 ('\\by\\b', 'xyz', FAIL),
Guido van Rossumcf005051997-08-15 15:44:58 +0000174 ('x\\b', 'xyz', FAIL),
175 ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
176 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
177 ('z\\B', 'xyz', FAIL),
178 ('\\Bx', 'xyz', FAIL),
179 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
180 ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
181 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
182 ('\\By\\B', 'xy', FAIL),
183 ('\\By\\B', 'yz', FAIL),
184 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
185 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
186 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000187 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
188 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
189 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
190 ('$b', 'b', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000191 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000192 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
193 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
194 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
195 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
196 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
197 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
198 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
199 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
200 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
201 (')(', '-', SYNTAX_ERROR),
202 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
203 ('abc', '', FAIL),
204 ('a*', '', SUCCEED, 'found', ''),
205 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
206 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
207 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
208 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
209 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
210 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
211 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000212 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000213 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
214 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
215 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
216 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
217 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
218 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
219 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
220 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
221 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
222 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
223 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
224 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
225 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
226 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
227 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
228 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
229 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
230 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
231 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
232 ('multiple words of text', 'uh-uh', FAIL),
233 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
234 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
235 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
236 ('[k]', 'ab', FAIL),
237 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
238 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
239 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
240 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
241 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
242 ('^(a+).\\1$', 'aaaa', FAIL),
243 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
244 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
245 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
246 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
247 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
248 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
249 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
250 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
251 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
252 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
253 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
254 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
255 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
256 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
257 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
258 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
259 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
260 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
261 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
262 ('([abc]*)x', 'abc', FAIL),
263 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
264 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
265
266 # Test symbolic groups
267
268 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
269 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
270 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
271 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
272
273 # Test octal escapes/memory references
274
275 ('\\1', 'a', SYNTAX_ERROR),
276 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
277 ('\\141', 'a', SUCCEED, 'found', 'a'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000278 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000279
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000280 # All tests from Perl
281
282 ('abc', 'abc', SUCCEED, 'found', 'abc'),
283 ('abc', 'xbc', FAIL),
284 ('abc', 'axc', FAIL),
285 ('abc', 'abx', FAIL),
286 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
287 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
288 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
289 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
290 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
291 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
292 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
293 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
294 ('ab+bc', 'abc', FAIL),
295 ('ab+bc', 'abq', FAIL),
296 ('ab{1,}bc', 'abq', FAIL),
297 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
298 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
299 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
300 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
301 ('ab{4,5}bc', 'abbbbc', FAIL),
302 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
303 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
304 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
305 ('ab?bc', 'abbbbc', FAIL),
306 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
307 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
308 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
309 ('^abc$', 'abcc', FAIL),
310 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
311 ('^abc$', 'aabc', FAIL),
312 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
313 ('^', 'abc', SUCCEED, 'found', ''),
314 ('$', 'abc', SUCCEED, 'found', ''),
315 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
316 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
317 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
318 ('a.*c', 'axyzd', FAIL),
319 ('a[bc]d', 'abc', FAIL),
320 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
321 ('a[b-d]e', 'abd', FAIL),
322 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
323 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
324 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
325 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
326 ('a[b-a]', '-', SYNTAX_ERROR),
327 ('a[]b', '-', SYNTAX_ERROR),
328 ('a[', '-', SYNTAX_ERROR),
329 ('a]', 'a]', SUCCEED, 'found', 'a]'),
330 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
331 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
332 ('a[^bc]d', 'abd', FAIL),
333 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
334 ('a[^-b]c', 'a-c', FAIL),
335 ('a[^]b]c', 'a]c', FAIL),
336 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
337 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
338 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
339 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
340 ('*a', '-', SYNTAX_ERROR),
341 ('(*)b', '-', SYNTAX_ERROR),
342 ('$b', 'b', FAIL),
343 ('a\\', '-', SYNTAX_ERROR),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000344 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000345 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
346 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
347 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
348 ('abc)', '-', SYNTAX_ERROR),
349 ('(abc', '-', SYNTAX_ERROR),
350 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
351 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
352 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
353 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000354 ('a**', '-', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000355 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
356 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
357 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
358 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
359 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
360 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
361 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
362 (')(', '-', SYNTAX_ERROR),
363 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
364 ('abc', '', FAIL),
365 ('a*', '', SUCCEED, 'found', ''),
366 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
367 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
368 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
369 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
370 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
371 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
372 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
373 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
374 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
375 ('^(ab|cd)e', 'abcde', FAIL),
376 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
377 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
378 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
379 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
380 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
381 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
382 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
383 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
384 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
385 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
386 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
387 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000388 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
389 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000390 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
391 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
392 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000393 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000394 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
395 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000396# Python does not have the same rules for \\41 so this is a syntax error
397# ('((((((((((a))))))))))\\41', 'aa', FAIL),
398# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000399 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
Guido van Rossum95e80531997-08-13 22:34:14 +0000400 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000401 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
402 ('multiple words of text', 'uh-uh', FAIL),
403 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
404 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000405 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000406 ('[k]', 'ab', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000407 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
408 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
409 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
410 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
411 ('(?i)abc', 'XBC', FAIL),
412 ('(?i)abc', 'AXC', FAIL),
413 ('(?i)abc', 'ABX', FAIL),
414 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
415 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
416 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
417 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
418 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
419 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
420 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
421 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
422 ('(?i)ab+bc', 'ABC', FAIL),
423 ('(?i)ab+bc', 'ABQ', FAIL),
424 ('(?i)ab{1,}bc', 'ABQ', FAIL),
425 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
426 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
427 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
428 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
429 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
430 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
431 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
432 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
433 ('(?i)ab??bc', 'ABBBBC', FAIL),
434 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
435 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
436 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
437 ('(?i)^abc$', 'ABCC', FAIL),
438 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
439 ('(?i)^abc$', 'AABC', FAIL),
440 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
441 ('(?i)^', 'ABC', SUCCEED, 'found', ''),
442 ('(?i)$', 'ABC', SUCCEED, 'found', ''),
443 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
444 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
445 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
446 ('(?i)a.*c', 'AXYZD', FAIL),
447 ('(?i)a[bc]d', 'ABC', FAIL),
448 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
449 ('(?i)a[b-d]e', 'ABD', FAIL),
450 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
451 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
452 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
453 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
454 ('(?i)a[b-a]', '-', SYNTAX_ERROR),
455 ('(?i)a[]b', '-', SYNTAX_ERROR),
456 ('(?i)a[', '-', SYNTAX_ERROR),
457 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
458 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
459 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
460 ('(?i)a[^bc]d', 'ABD', FAIL),
461 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
462 ('(?i)a[^-b]c', 'A-C', FAIL),
463 ('(?i)a[^]b]c', 'A]C', FAIL),
464 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
465 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
466 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
467 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
468 ('(?i)*a', '-', SYNTAX_ERROR),
469 ('(?i)(*)b', '-', SYNTAX_ERROR),
470 ('(?i)$b', 'B', FAIL),
471 ('(?i)a\\', '-', SYNTAX_ERROR),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000472 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000473 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
474 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
475 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
476 ('(?i)abc)', '-', SYNTAX_ERROR),
477 ('(?i)(abc', '-', SYNTAX_ERROR),
478 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
479 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
480 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
481 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000482 ('(?i)a**', '-', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000483 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
484 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
485 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
486 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
487 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
488 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
489 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
490 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
491 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000492 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000493 ('(?i))(', '-', SYNTAX_ERROR),
494 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
495 ('(?i)abc', '', FAIL),
496 ('(?i)a*', '', SUCCEED, 'found', ''),
497 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
498 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
499 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
500 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
501 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
502 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
503 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
504 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
505 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
506 ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
507 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
508 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
509 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
510 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
511 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
512 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
513 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
514 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
515 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
516 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
517 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
518 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000519 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
520 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000521 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000525 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
526 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000527 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
528 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000529 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
530 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
531 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
532 ('(?i)multiple words of text', 'UH-UH', FAIL),
533 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
534 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000535 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000536 ('(?i)[k]', 'AB', FAIL),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000537# ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
538# ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000539 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
540 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
541 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000542 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
543 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
544 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000545 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
546 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
547 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000548 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000549 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000550
551 # Comments using the (?#...) syntax
552
553 ('w(?# comment', 'w', SYNTAX_ERROR),
554 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000555
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000556 # Check odd placement of embedded pattern modifiers
557
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000558 # not an error under PCRE/PRE:
559 ('w(?i)', 'W', SUCCEED, 'found', 'W'),
560 # ('w(?i)', 'W', SYNTAX_ERROR),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000561
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000562 # Comments using the x embedded pattern modifier
563
564 ("""(?x)w# comment 1
565 x y
Fredrik Lundh2643b552000-08-08 16:52:51 +0000566 # comment 2
567 z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000568
569 # using the m embedded pattern modifier
570
571 ('^abc', """jkl
572abc
573xyz""", FAIL),
574 ('(?m)^abc', """jkl
575abc
576xyz""", SUCCEED, 'found', 'abc'),
577
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000578 ('(?m)abc$', """jkl
579xyzabc
580123""", SUCCEED, 'found', 'abc'),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000581
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000582 # using the s embedded pattern modifier
583
584 ('a.b', 'a\nb', FAIL),
585 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000586
Guido van Rossum95e80531997-08-13 22:34:14 +0000587 # test \w, etc. both inside and outside character classes
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000588
589 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
Guido van Rossum95e80531997-08-13 22:34:14 +0000590 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000591 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
Guido van Rossum95e80531997-08-13 22:34:14 +0000592 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000593 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000594 # not an error under PCRE/PRE:
595 # ('[\\d-x]', '-', SYNTAX_ERROR),
Guido van Rossum95e80531997-08-13 22:34:14 +0000596 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
597 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
598
599 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000600 # new \x semantics
Fredrik Lundh2e240442001-01-15 18:28:14 +0000601 (r'\x00ff', '\377', FAIL),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000602 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
Guido van Rossum95e80531997-08-13 22:34:14 +0000603 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
604 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
605 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000606 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000607
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000608 #
609 # post-1.5.2 additions
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000610
611 # xmllib problem
612 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
Fredrik Lundh2e240442001-01-15 18:28:14 +0000613 # bug 110866: reference to undefined group
614 (r'((.)\1+)', '', SYNTAX_ERROR),
615 # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000616 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
Fredrik Lundhd11b5e52000-10-03 19:22:26 +0000617 # bug 112468: various expected syntax errors
Fredrik Lundh2e240442001-01-15 18:28:14 +0000618 (r'(', '', SYNTAX_ERROR),
619 (r'[\41]', '!', SUCCEED, 'found', '!'),
Fredrik Lundh13ac9922000-10-07 17:38:23 +0000620 # bug 114033: nothing to repeat
621 (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
Fredrik Lundhd11b5e52000-10-03 19:22:26 +0000622 # bug 115040: rescan if flags are modified inside pattern
623 (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
624 # bug 115618: negative lookahead
Fredrik Lundh65d4bc62000-10-03 16:29:23 +0000625 (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
Fredrik Lundh025468d2000-10-07 10:16:19 +0000626 # bug 116251: character class bug
627 (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
Fredrik Lundh2e240442001-01-15 18:28:14 +0000628 # bug 123769+127259: non-greedy backtracking bug
629 (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
630 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
631 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
632 # bug 127259: \Z shouldn't depend on multiline mode
633 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
634 # bug 128899: uppercase literals under the ignorecase flag
635 (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
636 (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
637 (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
638 (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
Fredrik Lundhc0c7ee32001-02-18 21:04:48 +0000639 # bug 130748: ^* should be an error (nothing to repeat)
640 (r'^*', '', SYNTAX_ERROR),
Fredrik Lundh82b23072001-12-09 16:13:15 +0000641 # bug 133283: minimizing repeat problem
642 (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
643 # bug 477728: minimizing repeat problem
644 (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
645 # bug 483789: minimizing repeat problem
646 (r'a[^>]*?b', 'a>b', FAIL),
647 # bug 490573: minimizing repeat problem
648 (r'^a*?$', 'foo', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000649]
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000650
651try:
652 u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
653except SyntaxError:
654 pass
655else:
656 tests.extend([
657 # bug 410271: \b broken under locales
658 (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
659 (r'(?u)\b.\b', u, SUCCEED, 'found', u),
660 ])