blob: 3d8926fcdc4a577d7fc5a3497152aa0fc412360a [file] [log] [blame]
Fredrik Lundh143328b2000-09-02 11:03:34 +00001# SRE test harness for the Python regression suite
2
3# this is based on test_re.py, but uses a test function instead
4# of all those asserts
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00005
6import sys
7sys.path=['.']+sys.path
8
9from test_support import verbose, TestFailed
10import sre
11import sys, os, string, traceback
12
Fredrik Lundh143328b2000-09-02 11:03:34 +000013#
14# test support
15
16def test(expression, result, exception=None):
17 try:
18 r = eval(expression)
19 except:
20 if exception:
21 if not isinstance(sys.exc_value, exception):
22 print expression, "FAILED"
23 # display name, not actual value
24 if exception is sre.error:
25 print "expected", "sre.error"
26 else:
27 print "expected", exception.__name__
28 print "got", sys.exc_type.__name__, str(sys.exc_value)
29 else:
30 print expression, "FAILED"
31 traceback.print_exc(file=sys.stdout)
32 else:
33 if exception:
34 print expression, "FAILED"
35 if exception is sre.error:
36 print "expected", "sre.error"
37 else:
38 print "expected", exception.__name__
39 print "got result", repr(r)
40 else:
41 if r != result:
42 print expression, "FAILED"
43 print "expected", repr(result)
44 print "got result", repr(r)
45
46if verbose:
47 print 'Running tests on character literals'
48
49for i in range(0, 256):
50 test(r"""sre.match("\%03o" % i, chr(i)) != None""", 1)
51 test(r"""sre.match("\%03o0" % i, chr(i)+"0") != None""", 1)
52 test(r"""sre.match("\%03o8" % i, chr(i)+"8") != None""", 1)
53 test(r"""sre.match("\x%02x" % i, chr(i)) != None""", 1)
54 test(r"""sre.match("\x%02x0" % i, chr(i)+"0") != None""", 1)
55 test(r"""sre.match("\x%02xz" % i, chr(i)+"z") != None""", 1)
56test(r"""sre.match("\911", "")""", None, sre.error)
57
58#
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000059# Misc tests from Tim Peters' re.doc
60
61if verbose:
62 print 'Running tests on sre.search and sre.match'
63
Fredrik Lundh143328b2000-09-02 11:03:34 +000064test(r"""sre.search('x*', 'axx').span(0)""", (0, 0))
65test(r"""sre.search('x*', 'axx').span()""", (0, 0))
66test(r"""sre.search('x+', 'axx').span(0)""", (1, 3))
67test(r"""sre.search('x+', 'axx').span()""", (1, 3))
68test(r"""sre.search('x', 'aaa')""", None)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000069
Fredrik Lundh143328b2000-09-02 11:03:34 +000070test(r"""sre.match('a*', 'xxx').span(0)""", (0, 0))
71test(r"""sre.match('a*', 'xxx').span()""", (0, 0))
72test(r"""sre.match('x*', 'xxxa').span(0)""", (0, 3))
73test(r"""sre.match('x*', 'xxxa').span()""", (0, 3))
74test(r"""sre.match('a+', 'xxx')""", None)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000075
76if verbose:
77 print 'Running tests on sre.sub'
78
Fredrik Lundh143328b2000-09-02 11:03:34 +000079test(r"""sre.sub("(?i)b+", "x", "bbbb BBBB")""", 'x x')
Fredrik Lundh6f013982000-07-03 18:44:21 +000080
Fredrik Lundh143328b2000-09-02 11:03:34 +000081def bump_num(matchobj):
82 int_value = int(matchobj.group(0))
83 return str(int_value + 1)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000084
Fredrik Lundh143328b2000-09-02 11:03:34 +000085test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
86test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
Fredrik Lundh6f013982000-07-03 18:44:21 +000087
Fredrik Lundh143328b2000-09-02 11:03:34 +000088test(r"""sre.sub('.', lambda m: r"\n", 'x')""", '\\n')
89test(r"""sre.sub('.', r"\n", 'x')""", '\n')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000090
Fredrik Lundh143328b2000-09-02 11:03:34 +000091s = r"\1\1"
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000092
Fredrik Lundh143328b2000-09-02 11:03:34 +000093test(r"""sre.sub('(.)', s, 'x')""", 'xx')
94test(r"""sre.sub('(.)', sre.escape(s), 'x')""", s)
95test(r"""sre.sub('(.)', lambda m: s, 'x')""", s)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000096
Fredrik Lundh143328b2000-09-02 11:03:34 +000097test(r"""sre.sub('(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
98test(r"""sre.sub('(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
99test(r"""sre.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
100test(r"""sre.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000101
Fredrik Lundh143328b2000-09-02 11:03:34 +0000102test(r"""sre.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
103test(r"""sre.sub('a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
104test(r"""sre.sub('a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000105
Fredrik Lundh143328b2000-09-02 11:03:34 +0000106test(r"""sre.sub('^\s*', 'X', 'test')""", 'Xtest')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000107
Fredrik Lundh143328b2000-09-02 11:03:34 +0000108# qualified sub
109test(r"""sre.sub('a', 'b', 'aaaaa')""", 'bbbbb')
110test(r"""sre.sub('a', 'b', 'aaaaa', 1)""", 'baaaa')
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000111
112if verbose:
113 print 'Running tests on symbolic references'
114
Fredrik Lundh143328b2000-09-02 11:03:34 +0000115test(r"""sre.sub('(?P<a>x)', '\g<a', 'xx')""", None, sre.error)
116test(r"""sre.sub('(?P<a>x)', '\g<', 'xx')""", None, sre.error)
117test(r"""sre.sub('(?P<a>x)', '\g', 'xx')""", None, sre.error)
118test(r"""sre.sub('(?P<a>x)', '\g<a a>', 'xx')""", None, sre.error)
119test(r"""sre.sub('(?P<a>x)', '\g<1a1>', 'xx')""", None, sre.error)
120test(r"""sre.sub('(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
121test(r"""sre.sub('(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre.error)
122test(r"""sre.sub('(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre.error)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000123
124if verbose:
125 print 'Running tests on sre.subn'
126
Fredrik Lundh143328b2000-09-02 11:03:34 +0000127test(r"""sre.subn("(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
128test(r"""sre.subn("b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
129test(r"""sre.subn("b+", "x", "xyz")""", ('xyz', 0))
130test(r"""sre.subn("b*", "x", "xyz")""", ('xxxyxzx', 4))
131test(r"""sre.subn("b*", "x", "xyz", 2)""", ('xxxyz', 2))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000132
133if verbose:
134 print 'Running tests on sre.split'
Fredrik Lundh6f013982000-07-03 18:44:21 +0000135
Fredrik Lundh143328b2000-09-02 11:03:34 +0000136test(r"""sre.split(":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
137test(r"""sre.split(":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
138test(r"""sre.split("(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
139test(r"""sre.split("(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
140test(r"""sre.split("(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
141test(r"""sre.split("([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
142test(r"""sre.split("(b)|(:+)", ":a:b::c")""",
143 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
144test(r"""sre.split("(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000145
Fredrik Lundh143328b2000-09-02 11:03:34 +0000146test(r"""sre.split(":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
147test(r"""sre.split(':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000148
Fredrik Lundh143328b2000-09-02 11:03:34 +0000149test(r"""sre.split("(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
150test(r"""sre.split("(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000151
152if verbose:
153 print "Running tests on sre.findall"
154
Fredrik Lundh143328b2000-09-02 11:03:34 +0000155test(r"""sre.findall(":+", "abc")""", [])
156test(r"""sre.findall(":+", "a:b::c:::d")""", [":", "::", ":::"])
157test(r"""sre.findall("(:+)", "a:b::c:::d")""", [":", "::", ":::"])
158test(r"""sre.findall("(:)(:*)", "a:b::c:::d")""",
159 [(":", ""), (":", ":"), (":", "::")])
160test(r"""sre.findall("(a)|(b)", "abc")""", [("a", ""), ("", "b")])
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000161
162if verbose:
163 print "Running tests on sre.match"
164
Fredrik Lundh143328b2000-09-02 11:03:34 +0000165test(r"""sre.match('a', 'a').groups()""", ())
166test(r"""sre.match('(a)', 'a').groups()""", ('a',))
167test(r"""sre.match('(a)', 'a').group(0)""", 'a')
168test(r"""sre.match('(a)', 'a').group(1)""", 'a')
169test(r"""sre.match('(a)', 'a').group(1, 1)""", ('a', 'a'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000170
Fredrik Lundh143328b2000-09-02 11:03:34 +0000171pat = sre.compile('((a)|(b))(c)?')
172test(r"""pat.match('a').groups()""", ('a', 'a', None, None))
173test(r"""pat.match('b').groups()""", ('b', None, 'b', None))
174test(r"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
175test(r"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
176test(r"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000177
Fredrik Lundh143328b2000-09-02 11:03:34 +0000178pat = sre.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
179test(r"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
180test(r"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
181test(r"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000182
183if verbose:
184 print "Running tests on sre.escape"
185
Fredrik Lundh143328b2000-09-02 11:03:34 +0000186p = ""
187for i in range(0, 256):
188 p = p + chr(i)
189 test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1)
190 test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000191
Fredrik Lundh143328b2000-09-02 11:03:34 +0000192pat = sre.compile(sre.escape(p))
193test(r"""pat.match(p) != None""", 1)
194test(r"""pat.match(p).span()""", (0,256))
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000195
196if verbose:
197 print 'Pickling a SRE_Pattern instance'
198
199try:
200 import pickle
201 pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
202 s = pickle.dumps(pat)
203 pat = pickle.loads(s)
204except:
205 print TestFailed, 're module pickle' # expected
206
207try:
208 import cPickle
209 pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
210 s = cPickle.dumps(pat)
211 pat = cPickle.loads(s)
212except:
213 print TestFailed, 're module cPickle' # expected
214
Fredrik Lundh143328b2000-09-02 11:03:34 +0000215# constants
216test(r"""sre.I""", sre.IGNORECASE)
217test(r"""sre.L""", sre.LOCALE)
218test(r"""sre.M""", sre.MULTILINE)
219test(r"""sre.S""", sre.DOTALL)
220test(r"""sre.X""", sre.VERBOSE)
221test(r"""sre.T""", sre.TEMPLATE)
222test(r"""sre.U""", sre.UNICODE)
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000223
224for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
225 try:
226 r = sre.compile('^pattern$', flags)
227 except:
228 print 'Exception raised on flag', flags
229
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000230if verbose:
231 print 'Test engine limitations'
232
233# Try nasty case that overflows the straightforward recursive
234# implementation of repeated groups.
Fredrik Lundh143328b2000-09-02 11:03:34 +0000235test(r"""sre.match('(x)*', 50000*'x').span()""", (0, 50000), RuntimeError)
236test(r"""sre.match('(x)*y', 50000*'x'+'y').span()""", (0, 50001), RuntimeError)
237test(r"""sre.match('(x)*?y', 50000*'x'+'y').span()""", (0, 50001), RuntimeError)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000238
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000239from re_tests import *
240
241if verbose:
242 print 'Running re_tests test suite'
243else:
244 # To save time, only run the first and last 10 tests
245 #tests = tests[:10] + tests[-10:]
Fredrik Lundh6f013982000-07-03 18:44:21 +0000246 pass
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000247
248for t in tests:
249 sys.stdout.flush()
250 pattern=s=outcome=repl=expected=None
251 if len(t)==5:
252 pattern, s, outcome, repl, expected = t
253 elif len(t)==3:
Fredrik Lundh6f013982000-07-03 18:44:21 +0000254 pattern, s, outcome = t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000255 else:
256 raise ValueError, ('Test tuples should have 3 or 5 fields',t)
257
258 try:
259 obj=sre.compile(pattern)
260 except sre.error:
261 if outcome==SYNTAX_ERROR: pass # Expected a syntax error
Fredrik Lundh6f013982000-07-03 18:44:21 +0000262 else:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000263 print '=== Syntax error:', t
264 except KeyboardInterrupt: raise KeyboardInterrupt
265 except:
266 print '*** Unexpected error ***', t
267 if verbose:
268 traceback.print_exc(file=sys.stdout)
269 else:
270 try:
271 result=obj.search(s)
272 except (sre.error), msg:
273 print '=== Unexpected exception', t, repr(msg)
274 if outcome==SYNTAX_ERROR:
275 # This should have been a syntax error; forget it.
276 pass
277 elif outcome==FAIL:
278 if result is None: pass # No match, as expected
279 else: print '=== Succeeded incorrectly', t
280 elif outcome==SUCCEED:
281 if result is not None:
282 # Matched, as expected, so now we compute the
283 # result string and compare it to our expected result.
284 start, end = result.span(0)
285 vardict={'found': result.group(0),
286 'groups': result.group(),
287 'flags': result.re.flags}
288 for i in range(1, 100):
289 try:
290 gi = result.group(i)
291 # Special hack because else the string concat fails:
292 if gi is None:
293 gi = "None"
294 except IndexError:
295 gi = "Error"
296 vardict['g%d' % i] = gi
297 for i in result.re.groupindex.keys():
298 try:
299 gi = result.group(i)
300 if gi is None:
301 gi = "None"
302 except IndexError:
303 gi = "Error"
304 vardict[i] = gi
305 repl=eval(repl, vardict)
306 if repl!=expected:
307 print '=== grouping error', t,
308 print repr(repl)+' should be '+repr(expected)
309 else:
310 print '=== Failed incorrectly', t
Fredrik Lundh90a07912000-06-30 07:50:59 +0000311 continue
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000312
313 # Try the match on a unicode string, and check that it
314 # still succeeds.
315 result=obj.search(unicode(s, "latin-1"))
316 if result==None:
317 print '=== Fails on unicode match', t
318
319 # Try the match on a unicode pattern, and check that it
320 # still succeeds.
321 obj=sre.compile(unicode(pattern, "latin-1"))
322 result=obj.search(s)
323 if result==None:
324 print '=== Fails on unicode pattern match', t
325
326 # Try the match with the search area limited to the extent
327 # of the match and see if it still succeeds. \B will
328 # break (because it won't match at the end or start of a
329 # string), so we'll ignore patterns that feature it.
Fredrik Lundh6f013982000-07-03 18:44:21 +0000330
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000331 if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
332 obj=sre.compile(pattern)
Fredrik Lundh90a07912000-06-30 07:50:59 +0000333 result=obj.search(s, result.start(0), result.end(0)+1)
334 if result==None:
335 print '=== Failed on range-limited match', t
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000336
337 # Try the match with IGNORECASE enabled, and check that it
338 # still succeeds.
339 obj=sre.compile(pattern, sre.IGNORECASE)
340 result=obj.search(s)
341 if result==None:
342 print '=== Fails on case-insensitive match', t
343
344 # Try the match with LOCALE enabled, and check that it
345 # still succeeds.
346 obj=sre.compile(pattern, sre.LOCALE)
347 result=obj.search(s)
348 if result==None:
349 print '=== Fails on locale-sensitive match', t
350
Fredrik Lundhc2ed6212000-08-01 13:01:43 +0000351 # Try the match with UNICODE locale enabled, and check
352 # that it still succeeds.
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +0000353 obj=sre.compile(pattern, sre.UNICODE)
354 result=obj.search(s)
355 if result==None:
356 print '=== Fails on unicode-sensitive match', t