blob: 453ee211cd34a2d8403af4c46fc9ef60972c6fe8 [file] [log] [blame]
Fredrik Lundh143328b2000-09-02 11:03:34 +00001# SRE test harness for the Python regression suite
2
3# this is based on test_re.py, but uses a test function instead
4# of all those asserts
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00005
6import sys
7sys.path=['.']+sys.path
8
Barry Warsaw04f357c2002-07-23 19:04:11 +00009from test.test_support import verbose, TestFailed, have_unicode
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000010import sre
Fredrik Lundhf2989b22001-02-18 12:05:16 +000011import sys, os, string, traceback
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000012
Fredrik Lundh143328b2000-09-02 11:03:34 +000013#
14# test support
15
16def test(expression, result, exception=None):
17 try:
18 r = eval(expression)
19 except:
20 if exception:
21 if not isinstance(sys.exc_value, exception):
22 print expression, "FAILED"
23 # display name, not actual value
24 if exception is sre.error:
25 print "expected", "sre.error"
26 else:
27 print "expected", exception.__name__
28 print "got", sys.exc_type.__name__, str(sys.exc_value)
29 else:
30 print expression, "FAILED"
31 traceback.print_exc(file=sys.stdout)
32 else:
33 if exception:
34 print expression, "FAILED"
35 if exception is sre.error:
36 print "expected", "sre.error"
37 else:
38 print "expected", exception.__name__
39 print "got result", repr(r)
40 else:
41 if r != result:
42 print expression, "FAILED"
43 print "expected", repr(result)
44 print "got result", repr(r)
45
46if verbose:
47 print 'Running tests on character literals'
48
Fredrik Lundh510c97b2000-09-02 16:36:57 +000049for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Fredrik Lundh538f05c2001-01-14 15:15:37 +000050 test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
Fredrik Lundh143328b2000-09-02 11:03:34 +000056test(r"""sre.match("\911", "")""", None, sre.error)
57
58#
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000059# Misc tests from Tim Peters' re.doc
60
61if verbose:
62 print 'Running tests on sre.search and sre.match'
63
Fredrik Lundh03dd0102000-09-03 10:43:16 +000064test(r"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65test(r"""sre.search(r'x*', 'axx').span()""", (0, 0))
66test(r"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67test(r"""sre.search(r'x+', 'axx').span()""", (1, 3))
68test(r"""sre.search(r'x', 'aaa')""", None)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000069
Fredrik Lundh03dd0102000-09-03 10:43:16 +000070test(r"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71test(r"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72test(r"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73test(r"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74test(r"""sre.match(r'a+', 'xxx')""", None)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000075
Fredrik Lundh510c97b2000-09-02 16:36:57 +000076# bug 113254
Fredrik Lundh03dd0102000-09-03 10:43:16 +000077test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
Fredrik Lundh510c97b2000-09-02 16:36:57 +000080
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +000081# bug described in patches 527371/672491
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +000082test(r"""sre.match(r'(a)?a','a').lastindex""", None)
83test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
84test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +000085test(r"""sre.match("(?P<a>a(b))", "ab").lastgroup""", 'a')
86test(r"""sre.match("((a))", "a").lastindex""", 1)
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +000087
Guido van Rossum41c99e72003-04-14 17:59:34 +000088# bug 545855 -- This pattern failed to cause a compile error as it
89# should, instead provoking a TypeError.
90test(r"""sre.compile('foo[a-')""", None, sre.error)
91
92# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
93# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
94# pattern '*?' on a long string.
95test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001)
96test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003)
97test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001)
98# non-simple '*?' still recurses and hits the recursion limit
99test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError)
100
Martin v. Löwis53d93ad2003-04-19 08:37:24 +0000101# bug 612074
102pat=u"["+sre.escape(u"\u2039")+u"]"
103test(r"""sre.compile(pat) and 1""", 1, None)
104
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000105if verbose:
106 print 'Running tests on sre.sub'
107
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000108test(r"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
Fredrik Lundh6f013982000-07-03 18:44:21 +0000109
Fredrik Lundh143328b2000-09-02 11:03:34 +0000110def bump_num(matchobj):
111 int_value = int(matchobj.group(0))
112 return str(int_value + 1)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000113
Fredrik Lundh143328b2000-09-02 11:03:34 +0000114test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
115test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
Fredrik Lundh6f013982000-07-03 18:44:21 +0000116
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000117test(r"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
118test(r"""sre.sub(r'.', r"\n", 'x')""", '\n')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000119
Fredrik Lundh143328b2000-09-02 11:03:34 +0000120s = r"\1\1"
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000121
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000122test(r"""sre.sub(r'(.)', s, 'x')""", 'xx')
123test(r"""sre.sub(r'(.)', sre.escape(s), 'x')""", s)
124test(r"""sre.sub(r'(.)', lambda m: s, 'x')""", s)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000125
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000126test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
127test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
128test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
129test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000130
Fredrik Lundh59b68652001-09-18 20:55:24 +0000131# bug 449964: fails for group followed by other escape
132test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b')
133
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000134test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
135test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
136test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000137
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000138test(r"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000139
Fredrik Lundh143328b2000-09-02 11:03:34 +0000140# qualified sub
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000141test(r"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
142test(r"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000143
Fredrik Lundh19f977b2000-09-24 14:46:23 +0000144# bug 114660
145test(r"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
146
Guido van Rossume056e4d2001-08-10 14:52:48 +0000147# Test for sub() on escaped characters, see SF bug #449000
148test(r"""sre.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
149test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
150test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
151test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
152
Fredrik Lundh21009b92001-09-18 18:47:09 +0000153# Test for empty sub() behaviour, see SF bug #462270
154test(r"""sre.sub('x*', '-', 'abxd')""", '-a-b-d-')
155test(r"""sre.sub('x+', '-', 'abxd')""", 'ab-d')
156
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000157if verbose:
158 print 'Running tests on symbolic references'
159
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000160test(r"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre.error)
161test(r"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre.error)
162test(r"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre.error)
163test(r"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre.error)
164test(r"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre.error)
165test(r"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
166test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre.error)
167test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre.error)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000168
169if verbose:
170 print 'Running tests on sre.subn'
171
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000172test(r"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
173test(r"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
174test(r"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
175test(r"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
176test(r"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000177
178if verbose:
179 print 'Running tests on sre.split'
Fredrik Lundh6f013982000-07-03 18:44:21 +0000180
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000181test(r"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000182test(r"""sre.split(r":+", ":a:b:::")""", ['', 'a', 'b', ''])
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000183test(r"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
184test(r"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
185test(r"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
186test(r"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
187test(r"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
188test(r"""sre.split(r"(b)|(:+)", ":a:b::c")""",
Fredrik Lundh143328b2000-09-02 11:03:34 +0000189 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000190test(r"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000191
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000192test(r"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
193test(r"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000194
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000195test(r"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
196test(r"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000197
198if verbose:
199 print "Running tests on sre.findall"
200
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000201test(r"""sre.findall(r":+", "abc")""", [])
202test(r"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
203test(r"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
204test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
Fredrik Lundh143328b2000-09-02 11:03:34 +0000205 [(":", ""), (":", ":"), (":", "::")])
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000206test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000207
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000208# bug 117612
209test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
210
Fredrik Lundhb7747e22001-10-28 20:15:40 +0000211if sys.hexversion >= 0x02020000:
212 if verbose:
213 print "Running tests on sre.finditer"
214 def fixup(seq):
215 # convert iterator to list
216 if not hasattr(seq, "next") or not hasattr(seq, "__iter__"):
217 print "finditer returned", type(seq)
218 return map(lambda item: item.group(0), seq)
219 # sanity
220 test(r"""fixup(sre.finditer(r":+", "a:b::c:::d"))""", [":", "::", ":::"])
221
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000222if verbose:
223 print "Running tests on sre.match"
224
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000225test(r"""sre.match(r'a', 'a').groups()""", ())
226test(r"""sre.match(r'(a)', 'a').groups()""", ('a',))
227test(r"""sre.match(r'(a)', 'a').group(0)""", 'a')
228test(r"""sre.match(r'(a)', 'a').group(1)""", 'a')
229test(r"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000230
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000231pat = sre.compile(r'((a)|(b))(c)?')
Fredrik Lundh143328b2000-09-02 11:03:34 +0000232test(r"""pat.match('a').groups()""", ('a', 'a', None, None))
233test(r"""pat.match('b').groups()""", ('b', None, 'b', None))
234test(r"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
235test(r"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
236test(r"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000237
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000238pat = sre.compile(r'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
Fredrik Lundh143328b2000-09-02 11:03:34 +0000239test(r"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
240test(r"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
241test(r"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000242
Fredrik Lundh397a6542001-10-18 19:30:16 +0000243# bug 448951 (similar to 429357, but with single char match)
244# (Also test greedy matches.)
245for op in '','?','*':
246 test(r"""sre.match(r'((.%s):)?z', 'z').groups()"""%op, (None, None))
247 test(r"""sre.match(r'((.%s):)?z', 'a:z').groups()"""%op, ('a:', 'a'))
248
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000249if verbose:
250 print "Running tests on sre.escape"
251
Fredrik Lundh143328b2000-09-02 11:03:34 +0000252p = ""
253for i in range(0, 256):
254 p = p + chr(i)
Fredrik Lundh538f05c2001-01-14 15:15:37 +0000255 test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
Fredrik Lundh143328b2000-09-02 11:03:34 +0000256 test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000257
Fredrik Lundh143328b2000-09-02 11:03:34 +0000258pat = sre.compile(sre.escape(p))
Fredrik Lundh538f05c2001-01-14 15:15:37 +0000259test(r"""pat.match(p) is not None""", 1)
Fredrik Lundh143328b2000-09-02 11:03:34 +0000260test(r"""pat.match(p).span()""", (0,256))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000261
262if verbose:
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000263 print 'Running tests on sre.Scanner'
264
265def s_ident(scanner, token): return token
266def s_operator(scanner, token): return "op%s" % token
267def s_float(scanner, token): return float(token)
268def s_int(scanner, token): return int(token)
269
270scanner = sre.Scanner([
271 (r"[a-zA-Z_]\w*", s_ident),
272 (r"\d+\.\d*", s_float),
273 (r"\d+", s_int),
274 (r"=|\+|-|\*|/", s_operator),
275 (r"\s+", None),
276 ])
277
278# sanity check
279test('scanner.scan("sum = 3*foo + 312.50 + bar")',
280 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
281
282if verbose:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000283 print 'Pickling a SRE_Pattern instance'
284
285try:
286 import pickle
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000287 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000288 s = pickle.dumps(pat)
289 pat = pickle.loads(s)
290except:
Guido van Rossumbaefceb2001-12-08 05:11:15 +0000291 print TestFailed, 're module pickle'
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000292
293try:
294 import cPickle
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000295 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000296 s = cPickle.dumps(pat)
297 pat = cPickle.loads(s)
298except:
Guido van Rossumbaefceb2001-12-08 05:11:15 +0000299 print TestFailed, 're module cPickle'
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000300
Fredrik Lundh143328b2000-09-02 11:03:34 +0000301# constants
302test(r"""sre.I""", sre.IGNORECASE)
303test(r"""sre.L""", sre.LOCALE)
304test(r"""sre.M""", sre.MULTILINE)
305test(r"""sre.S""", sre.DOTALL)
306test(r"""sre.X""", sre.VERBOSE)
307test(r"""sre.T""", sre.TEMPLATE)
308test(r"""sre.U""", sre.UNICODE)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000309
310for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
311 try:
312 r = sre.compile('^pattern$', flags)
313 except:
314 print 'Exception raised on flag', flags
315
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000316if verbose:
317 print 'Test engine limitations'
318
319# Try nasty case that overflows the straightforward recursive
320# implementation of repeated groups.
Fredrik Lundh015415e2001-03-22 23:48:28 +0000321test("sre.match('(x)*', 50000*'x').span()", (0, 50000), RuntimeError)
322test("sre.match(r'(x)*y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
Fredrik Lundh82b23072001-12-09 16:13:15 +0000323test("sre.match(r'(x)*?y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000324
Barry Warsaw408b6d32002-07-30 23:27:12 +0000325from test.re_tests import *
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000326
327if verbose:
328 print 'Running re_tests test suite'
329else:
330 # To save time, only run the first and last 10 tests
331 #tests = tests[:10] + tests[-10:]
Fredrik Lundh6f013982000-07-03 18:44:21 +0000332 pass
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000333
334for t in tests:
335 sys.stdout.flush()
336 pattern=s=outcome=repl=expected=None
337 if len(t)==5:
338 pattern, s, outcome, repl, expected = t
339 elif len(t)==3:
Fredrik Lundh6f013982000-07-03 18:44:21 +0000340 pattern, s, outcome = t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000341 else:
342 raise ValueError, ('Test tuples should have 3 or 5 fields',t)
343
344 try:
345 obj=sre.compile(pattern)
346 except sre.error:
347 if outcome==SYNTAX_ERROR: pass # Expected a syntax error
Fredrik Lundh6f013982000-07-03 18:44:21 +0000348 else:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000349 print '=== Syntax error:', t
350 except KeyboardInterrupt: raise KeyboardInterrupt
351 except:
352 print '*** Unexpected error ***', t
353 if verbose:
354 traceback.print_exc(file=sys.stdout)
355 else:
356 try:
357 result=obj.search(s)
358 except (sre.error), msg:
359 print '=== Unexpected exception', t, repr(msg)
360 if outcome==SYNTAX_ERROR:
Fredrik Lundh03dd0102000-09-03 10:43:16 +0000361 print '=== Compiled incorrectly', t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000362 elif outcome==FAIL:
363 if result is None: pass # No match, as expected
364 else: print '=== Succeeded incorrectly', t
365 elif outcome==SUCCEED:
366 if result is not None:
367 # Matched, as expected, so now we compute the
368 # result string and compare it to our expected result.
369 start, end = result.span(0)
370 vardict={'found': result.group(0),
371 'groups': result.group(),
372 'flags': result.re.flags}
373 for i in range(1, 100):
374 try:
375 gi = result.group(i)
376 # Special hack because else the string concat fails:
377 if gi is None:
378 gi = "None"
379 except IndexError:
380 gi = "Error"
381 vardict['g%d' % i] = gi
382 for i in result.re.groupindex.keys():
383 try:
384 gi = result.group(i)
385 if gi is None:
386 gi = "None"
387 except IndexError:
388 gi = "Error"
389 vardict[i] = gi
390 repl=eval(repl, vardict)
391 if repl!=expected:
392 print '=== grouping error', t,
393 print repr(repl)+' should be '+repr(expected)
394 else:
395 print '=== Failed incorrectly', t
Fredrik Lundh90a07912000-06-30 07:50:59 +0000396 continue
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000397
398 # Try the match on a unicode string, and check that it
399 # still succeeds.
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000400 try:
401 u = unicode(s, "latin-1")
402 except NameError:
403 pass
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000404 except TypeError:
405 continue # skip unicode test strings
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000406 else:
407 result=obj.search(u)
408 if result==None:
409 print '=== Fails on unicode match', t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000410
411 # Try the match on a unicode pattern, and check that it
412 # still succeeds.
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000413 try:
414 u = unicode(pattern, "latin-1")
415 except NameError:
416 pass
417 else:
418 obj=sre.compile(u)
419 result=obj.search(s)
420 if result==None:
421 print '=== Fails on unicode pattern match', t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000422
423 # Try the match with the search area limited to the extent
424 # of the match and see if it still succeeds. \B will
425 # break (because it won't match at the end or start of a
426 # string), so we'll ignore patterns that feature it.
Fredrik Lundh6f013982000-07-03 18:44:21 +0000427
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000428 if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
429 obj=sre.compile(pattern)
Fredrik Lundh90a07912000-06-30 07:50:59 +0000430 result=obj.search(s, result.start(0), result.end(0)+1)
431 if result==None:
432 print '=== Failed on range-limited match', t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000433
434 # Try the match with IGNORECASE enabled, and check that it
435 # still succeeds.
436 obj=sre.compile(pattern, sre.IGNORECASE)
437 result=obj.search(s)
438 if result==None:
439 print '=== Fails on case-insensitive match', t
440
441 # Try the match with LOCALE enabled, and check that it
442 # still succeeds.
443 obj=sre.compile(pattern, sre.LOCALE)
444 result=obj.search(s)
445 if result==None:
446 print '=== Fails on locale-sensitive match', t
447
Fredrik Lundhc2ed6212000-08-01 13:01:43 +0000448 # Try the match with UNICODE locale enabled, and check
449 # that it still succeeds.
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000450 if have_unicode:
451 obj=sre.compile(pattern, sre.UNICODE)
452 result=obj.search(s)
453 if result==None:
454 print '=== Fails on unicode-sensitive match', t