blob: 7eac04cb8f4ebcf74d2ccc81e0f0df99db939dc3 [file] [log] [blame]
Guido van Rossum337c6d41997-07-15 18:42:58 +00001#!/usr/bin/env python
2# -*- mode: python -*-
3# $Id$
4
Guido van Rossum8430c581998-04-03 21:47:12 +00005# Re test suite and benchmark suite v1.5
Guido van Rossum8e0ce301997-07-11 19:34:44 +00006
7# The 3 possible outcomes for each pattern
8[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
9
10# Benchmark suite (needs expansion)
11#
12# The benchmark suite does not test correctness, just speed. The
Fredrik Lundh2643b552000-08-08 16:52:51 +000013# first element of each tuple is the regex pattern; the second is a
Guido van Rossum8e0ce301997-07-11 19:34:44 +000014# string to match it against. The benchmarking code will embed the
Fredrik Lundh2643b552000-08-08 16:52:51 +000015# second string inside several sizes of padding, to test how regex
Guido van Rossum8e0ce301997-07-11 19:34:44 +000016# matching performs on large strings.
17
18benchmarks = [
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000019
20 # test common prefix
21 ('Python|Perl', 'Perl'), # Alternation
22 ('(Python|Perl)', 'Perl'), # Grouped alternation
23
24 ('Python|Perl|Tcl', 'Perl'), # Alternation
25 ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation
26
27 ('(Python)\\1', 'PythonPython'), # Backreference
28 ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
29 ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
30
Fredrik Lundh2643b552000-08-08 16:52:51 +000031 ('Python', 'Python'), # Simple text literal
32 ('.*Python', 'Python'), # Bad text literal
33 ('.*Python.*', 'Python'), # Worse text literal
34 ('.*(Python)', 'Python'), # Bad text literal with grouping
35
Guido van Rossum8e0ce301997-07-11 19:34:44 +000036]
37
38# Test suite (for verifying correctness)
39#
40# The test suite is a list of 5- or 3-tuples. The 5 parts of a
41# complete tuple are:
42# element 0: a string containing the pattern
43# 1: the string to match against the pattern
44# 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
45# 3: a string that will be eval()'ed to produce a test string.
46# This is an arbitrary Python expression; the available
47# variables are "found" (the whole match), and "g1", "g2", ...
Guido van Rossum16bd0ff1997-07-15 18:45:20 +000048# up to "g99" contain the contents of each group, or the
49# string 'None' if the group wasn't given a value, or the
50# string 'Error' if the group index was out of range;
51# also "groups", the return value of m.group() (a tuple).
Guido van Rossum8e0ce301997-07-11 19:34:44 +000052# 4: The expected result of evaluating the expression.
53# If the two don't match, an error is reported.
54#
55# If the regex isn't expected to work, the latter two elements can be omitted.
56
Fredrik Lundh2643b552000-08-08 16:52:51 +000057tests = [
Guido van Rossumdfa67901997-12-08 17:12:06 +000058 # Test ?P< and ?P= extensions
59 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier
60 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit
61 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
62 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
63
64 # Same tests, for the ?P= form
65 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
66 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
67 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
68 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group
69
70 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
71 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
Guido van Rossum8430c581998-04-03 21:47:12 +000072
Guido van Rossumdfa67901997-12-08 17:12:06 +000073 # Test octal escapes
Guido van Rossum8430c581998-04-03 21:47:12 +000074 ('\\1', 'a', SYNTAX_ERROR), # Backreference
75 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character
Guido van Rossumdfa67901997-12-08 17:12:06 +000076 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
77 ('\\141', 'a', SUCCEED, 'found', 'a'),
78 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
79
Guido van Rossum8430c581998-04-03 21:47:12 +000080 # Test \0 is handled everywhere
Guido van Rossumdfa67901997-12-08 17:12:06 +000081 (r'\0', '\0', SUCCEED, 'found', '\0'),
Guido van Rossumdfa67901997-12-08 17:12:06 +000082 (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
83 (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
84 (r'[^a\0]', '\0', FAIL),
Fredrik Lundh2643b552000-08-08 16:52:51 +000085
Guido van Rossumdfa67901997-12-08 17:12:06 +000086 # Test various letter escapes
87 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
88 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000089 # NOTE: not an error under PCRE/PRE:
90 # (r'\u', '', SYNTAX_ERROR), # A Perl escape
Guido van Rossumdfa67901997-12-08 17:12:06 +000091 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
92 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +000093 # new \x semantics
94 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
95 (r'\x00f', '\017', FAIL, 'found', chr(15)),
96 (r'\x00fe', '\376', FAIL, 'found', chr(254)),
97 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
98 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
99 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000100
Fredrik Lundh2643b552000-08-08 16:52:51 +0000101 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
Guido van Rossumdfa67901997-12-08 17:12:06 +0000102 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000103
Guido van Rossumdfa67901997-12-08 17:12:06 +0000104 # Test that . only matches \n in DOTALL mode
105 ('a.b', 'acb', SUCCEED, 'found', 'acb'),
106 ('a.b', 'a\nb', FAIL),
107 ('a.*b', 'acc\nccb', FAIL),
108 ('a.{4,5}b', 'acc\nccb', FAIL),
109 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
110 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
111 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
112 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
113 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
114
Guido van Rossum8430c581998-04-03 21:47:12 +0000115 (')', '', SYNTAX_ERROR), # Unmatched right bracket
116 ('', '', SUCCEED, 'found', ''), # Empty pattern
Guido van Rossum337c6d41997-07-15 18:42:58 +0000117 ('abc', 'abc', SUCCEED, 'found', 'abc'),
118 ('abc', 'xbc', FAIL),
119 ('abc', 'axc', FAIL),
120 ('abc', 'abx', FAIL),
121 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
122 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
123 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
124 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
125 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
126 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
127 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
128 ('ab+bc', 'abc', FAIL),
129 ('ab+bc', 'abq', FAIL),
130 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
131 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
132 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
133 ('ab?bc', 'abbbbc', FAIL),
134 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
135 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
136 ('^abc$', 'abcc', FAIL),
137 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
138 ('^abc$', 'aabc', FAIL),
139 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
140 ('^', 'abc', SUCCEED, 'found+"-"', '-'),
141 ('$', 'abc', SUCCEED, 'found+"-"', '-'),
142 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
143 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
144 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
145 ('a.*c', 'axyzd', FAIL),
146 ('a[bc]d', 'abc', FAIL),
147 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
148 ('a[b-d]e', 'abd', FAIL),
149 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
150 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000151 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000152 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000153 # NOTE: not an error under PCRE/PRE:
154 # ('a[b-]', 'a-', SYNTAX_ERROR),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000155 ('a[]b', '-', SYNTAX_ERROR),
156 ('a[', '-', SYNTAX_ERROR),
157 ('a\\', '-', SYNTAX_ERROR),
158 ('abc)', '-', SYNTAX_ERROR),
159 ('(abc', '-', SYNTAX_ERROR),
160 ('a]', 'a]', SUCCEED, 'found', 'a]'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000161 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000162 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
163 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
164 ('a[^bc]d', 'abd', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000165 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
166 ('a[^-b]c', 'a-c', FAIL),
167 ('a[^]b]c', 'a]c', FAIL),
168 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000169 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
170 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
171 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
172 ('\\by\\b', 'xy', FAIL),
173 ('\\by\\b', 'yz', FAIL),
174 ('\\by\\b', 'xyz', FAIL),
Guido van Rossumcf005051997-08-15 15:44:58 +0000175 ('x\\b', 'xyz', FAIL),
176 ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
177 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
178 ('z\\B', 'xyz', FAIL),
179 ('\\Bx', 'xyz', FAIL),
180 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
181 ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
182 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
183 ('\\By\\B', 'xy', FAIL),
184 ('\\By\\B', 'yz', FAIL),
185 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
186 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
187 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000188 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
189 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
190 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
191 ('$b', 'b', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000192 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000193 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
194 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
195 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
196 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
197 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
198 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
199 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
200 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
201 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
202 (')(', '-', SYNTAX_ERROR),
203 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
204 ('abc', '', FAIL),
205 ('a*', '', SUCCEED, 'found', ''),
206 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
207 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
208 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
209 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
210 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
211 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
212 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000213 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
Guido van Rossum337c6d41997-07-15 18:42:58 +0000214 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
215 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
216 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
217 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
218 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
219 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
220 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
221 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
222 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
223 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
224 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
225 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
226 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
227 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
228 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
229 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
230 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
231 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
232 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
233 ('multiple words of text', 'uh-uh', FAIL),
234 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
235 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
236 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
237 ('[k]', 'ab', FAIL),
238 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
239 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
240 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
241 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
242 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
243 ('^(a+).\\1$', 'aaaa', FAIL),
244 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
245 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
246 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
247 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
248 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
249 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
250 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
251 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
252 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
253 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
254 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
255 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
256 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
257 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
258 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
259 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
260 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
261 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
262 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
263 ('([abc]*)x', 'abc', FAIL),
264 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
265 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
266
267 # Test symbolic groups
268
269 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
270 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
271 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
272 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
273
274 # Test octal escapes/memory references
275
276 ('\\1', 'a', SYNTAX_ERROR),
277 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
278 ('\\141', 'a', SUCCEED, 'found', 'a'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000279 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000280
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000281 # All tests from Perl
282
283 ('abc', 'abc', SUCCEED, 'found', 'abc'),
284 ('abc', 'xbc', FAIL),
285 ('abc', 'axc', FAIL),
286 ('abc', 'abx', FAIL),
287 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
288 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
289 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
290 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
291 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
292 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
293 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
294 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
295 ('ab+bc', 'abc', FAIL),
296 ('ab+bc', 'abq', FAIL),
297 ('ab{1,}bc', 'abq', FAIL),
298 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
299 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
300 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
301 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
302 ('ab{4,5}bc', 'abbbbc', FAIL),
303 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
304 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
305 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
306 ('ab?bc', 'abbbbc', FAIL),
307 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
308 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
309 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
310 ('^abc$', 'abcc', FAIL),
311 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
312 ('^abc$', 'aabc', FAIL),
313 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
314 ('^', 'abc', SUCCEED, 'found', ''),
315 ('$', 'abc', SUCCEED, 'found', ''),
316 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
317 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
318 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
319 ('a.*c', 'axyzd', FAIL),
320 ('a[bc]d', 'abc', FAIL),
321 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
322 ('a[b-d]e', 'abd', FAIL),
323 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
324 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
325 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
326 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
327 ('a[b-a]', '-', SYNTAX_ERROR),
328 ('a[]b', '-', SYNTAX_ERROR),
329 ('a[', '-', SYNTAX_ERROR),
330 ('a]', 'a]', SUCCEED, 'found', 'a]'),
331 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
332 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
333 ('a[^bc]d', 'abd', FAIL),
334 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
335 ('a[^-b]c', 'a-c', FAIL),
336 ('a[^]b]c', 'a]c', FAIL),
337 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
338 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
339 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
340 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
341 ('*a', '-', SYNTAX_ERROR),
342 ('(*)b', '-', SYNTAX_ERROR),
343 ('$b', 'b', FAIL),
344 ('a\\', '-', SYNTAX_ERROR),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000345 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000346 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
347 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
348 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
349 ('abc)', '-', SYNTAX_ERROR),
350 ('(abc', '-', SYNTAX_ERROR),
351 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
352 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
353 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
354 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000355 ('a**', '-', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000356 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
357 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
358 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
359 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
360 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
361 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
362 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
363 (')(', '-', SYNTAX_ERROR),
364 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
365 ('abc', '', FAIL),
366 ('a*', '', SUCCEED, 'found', ''),
367 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
368 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
369 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
370 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
371 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
372 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
373 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
374 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
375 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
376 ('^(ab|cd)e', 'abcde', FAIL),
377 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
378 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
379 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
380 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
381 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
382 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
383 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
384 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
385 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
386 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
387 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
388 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000389 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
390 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000391 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
392 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
393 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000394 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000395 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
396 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000397# Python does not have the same rules for \\41 so this is a syntax error
398# ('((((((((((a))))))))))\\41', 'aa', FAIL),
399# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000400 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
Guido van Rossum95e80531997-08-13 22:34:14 +0000401 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000402 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
403 ('multiple words of text', 'uh-uh', FAIL),
404 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
405 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000406 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000407 ('[k]', 'ab', FAIL),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000408 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
409 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
410 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
411 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
412 ('(?i)abc', 'XBC', FAIL),
413 ('(?i)abc', 'AXC', FAIL),
414 ('(?i)abc', 'ABX', FAIL),
415 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
416 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
417 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
418 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
419 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
420 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
421 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
422 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
423 ('(?i)ab+bc', 'ABC', FAIL),
424 ('(?i)ab+bc', 'ABQ', FAIL),
425 ('(?i)ab{1,}bc', 'ABQ', FAIL),
426 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
427 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
428 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
429 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
430 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
431 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
432 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
433 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
434 ('(?i)ab??bc', 'ABBBBC', FAIL),
435 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
436 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
437 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
438 ('(?i)^abc$', 'ABCC', FAIL),
439 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
440 ('(?i)^abc$', 'AABC', FAIL),
441 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
442 ('(?i)^', 'ABC', SUCCEED, 'found', ''),
443 ('(?i)$', 'ABC', SUCCEED, 'found', ''),
444 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
445 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
446 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
447 ('(?i)a.*c', 'AXYZD', FAIL),
448 ('(?i)a[bc]d', 'ABC', FAIL),
449 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
450 ('(?i)a[b-d]e', 'ABD', FAIL),
451 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
452 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
453 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
454 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
455 ('(?i)a[b-a]', '-', SYNTAX_ERROR),
456 ('(?i)a[]b', '-', SYNTAX_ERROR),
457 ('(?i)a[', '-', SYNTAX_ERROR),
458 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
459 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
460 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
461 ('(?i)a[^bc]d', 'ABD', FAIL),
462 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
463 ('(?i)a[^-b]c', 'A-C', FAIL),
464 ('(?i)a[^]b]c', 'A]C', FAIL),
465 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
466 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
467 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
468 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
469 ('(?i)*a', '-', SYNTAX_ERROR),
470 ('(?i)(*)b', '-', SYNTAX_ERROR),
471 ('(?i)$b', 'B', FAIL),
472 ('(?i)a\\', '-', SYNTAX_ERROR),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000473 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000474 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
475 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
476 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
477 ('(?i)abc)', '-', SYNTAX_ERROR),
478 ('(?i)(abc', '-', SYNTAX_ERROR),
479 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
480 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
481 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
482 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000483 ('(?i)a**', '-', SYNTAX_ERROR),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000484 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
485 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
486 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
487 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
488 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
489 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
490 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
491 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
492 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000493 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000494 ('(?i))(', '-', SYNTAX_ERROR),
495 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
496 ('(?i)abc', '', FAIL),
497 ('(?i)a*', '', SUCCEED, 'found', ''),
498 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
499 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
500 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
501 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
502 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
503 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
504 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
505 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
506 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
507 ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
508 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
509 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
510 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
511 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
512 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
513 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
514 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
515 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
516 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
517 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
518 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
519 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000520 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
521 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000525 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000526 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
527 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000528 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
529 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000530 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
531 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
532 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
533 ('(?i)multiple words of text', 'UH-UH', FAIL),
534 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
535 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000536 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000537 ('(?i)[k]', 'AB', FAIL),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000538# ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
539# ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000540 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
541 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
542 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000543 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
544 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
545 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000546 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
547 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
548 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000549 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000550 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000551
552 # Comments using the (?#...) syntax
553
554 ('w(?# comment', 'w', SYNTAX_ERROR),
555 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000556
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000557 # Check odd placement of embedded pattern modifiers
558
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000559 # not an error under PCRE/PRE:
560 ('w(?i)', 'W', SUCCEED, 'found', 'W'),
561 # ('w(?i)', 'W', SYNTAX_ERROR),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000562
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000563 # Comments using the x embedded pattern modifier
564
565 ("""(?x)w# comment 1
566 x y
Fredrik Lundh2643b552000-08-08 16:52:51 +0000567 # comment 2
568 z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000569
570 # using the m embedded pattern modifier
571
572 ('^abc', """jkl
573abc
574xyz""", FAIL),
575 ('(?m)^abc', """jkl
576abc
577xyz""", SUCCEED, 'found', 'abc'),
578
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000579 ('(?m)abc$', """jkl
580xyzabc
581123""", SUCCEED, 'found', 'abc'),
Fredrik Lundh2643b552000-08-08 16:52:51 +0000582
Guido van Rossuma0e4c1b1997-07-17 14:52:48 +0000583 # using the s embedded pattern modifier
584
585 ('a.b', 'a\nb', FAIL),
586 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000587
Guido van Rossum95e80531997-08-13 22:34:14 +0000588 # test \w, etc. both inside and outside character classes
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000589
590 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
Guido van Rossum95e80531997-08-13 22:34:14 +0000591 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000592 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
Guido van Rossum95e80531997-08-13 22:34:14 +0000593 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
Guido van Rossum06c0ec91997-07-17 22:36:39 +0000594 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000595 # not an error under PCRE/PRE:
596 # ('[\\d-x]', '-', SYNTAX_ERROR),
Guido van Rossum95e80531997-08-13 22:34:14 +0000597 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
598 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
599
600 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000601 # new \x semantics
602 (r'\x00ff', '\377', FAIL, 'found', chr(255)),
603 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
Guido van Rossum95e80531997-08-13 22:34:14 +0000604 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
605 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
606 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
Guido van Rossumdfa67901997-12-08 17:12:06 +0000607 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000608
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000609 #
610 # post-1.5.2 additions
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000611
612 # xmllib problem
613 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
Fredrik Lundh0c4fdba2000-08-31 22:57:55 +0000614 # bug 111869 (PRE/PCRE fails on this one, SRE doesn't)
615 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
616 # bug 112468
617 ('(', '', SYNTAX_ERROR),
618 ('[\\41]', '!', SUCCEED, 'found', '!'),
Guido van Rossum16bd0ff1997-07-15 18:45:20 +0000619]