blob: 8136002f746111f5cb4e87ed2ee7fad6bbc07923 [file] [log] [blame]
Brett Cannon672237d2008-09-09 00:49:16 +00001from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00003from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00004import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00005from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00006
Guido van Rossum23b22571997-07-17 22:36:14 +00007# Misc tests from Tim Peters' re.doc
8
Just van Rossum6802c6e2003-07-02 14:36:59 +00009# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000010# what you're doing. Some of these tests were carefuly modeled to
11# cover most of the code.
12
Skip Montanaro8ed06da2003-04-24 19:43:18 +000013import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000014
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000016
17 def test_weakref(self):
18 s = 'QabbbcR'
19 x = re.compile('ab+c')
20 y = proxy(x)
21 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
22
Skip Montanaro8ed06da2003-04-24 19:43:18 +000023 def test_search_star_plus(self):
24 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
25 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
26 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
27 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000028 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
30 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
31 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
32 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000033 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000034
Skip Montanaro8ed06da2003-04-24 19:43:18 +000035 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000036 int_value = int(matchobj.group(0))
37 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000038
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def test_basic_re_sub(self):
40 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
41 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
42 '9.3 -3 24x100y')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
44 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000045
Skip Montanaro8ed06da2003-04-24 19:43:18 +000046 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
47 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 s = r"\1\1"
50 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
51 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
52 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
55 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
56 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000058
Skip Montanaro8ed06da2003-04-24 19:43:18 +000059 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
60 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
61 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
62 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
63 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000066
Skip Montanaro2726fcd2003-04-25 14:31:54 +000067 def test_bug_449964(self):
68 # fails for group followed by other escape
69 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
70 'xx\bxx\b')
71
72 def test_bug_449000(self):
73 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
75 'abc\ndef\n')
76 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000082
Guido van Rossum1ff91d92007-09-10 22:02:25 +000083 def test_bug_1140(self):
84 # re.sub(x, y, u'') should return u'', not '', and
85 # re.sub(x, y, '') should return '', not u''.
86 # Also:
87 # re.sub(x, y, unicode(x)) should return unicode(y), and
88 # re.sub(x, y, str(x)) should return
89 # str(y) if isinstance(y, str) else unicode(y).
90 for x in 'x', u'x':
91 for y in 'y', u'y':
92 z = re.sub(x, y, u'')
93 self.assertEqual(z, u'')
94 self.assertEqual(type(z), unicode)
95 #
96 z = re.sub(x, y, '')
97 self.assertEqual(z, '')
98 self.assertEqual(type(z), str)
99 #
100 z = re.sub(x, y, unicode(x))
101 self.assertEqual(z, y)
102 self.assertEqual(type(z), unicode)
103 #
104 z = re.sub(x, y, str(x))
105 self.assertEqual(z, y)
106 self.assertEqual(type(z), type(y))
107
Raymond Hettinger80016c92007-12-19 18:13:31 +0000108 def test_bug_1661(self):
109 # Verify that flags do not get silently ignored with compiled patterns
110 pattern = re.compile('.')
111 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
112 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
113 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
114 self.assertRaises(ValueError, re.compile, pattern, re.I)
115
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000116 def test_bug_3629(self):
117 # A regex that triggered a bug in the sre-code validator
118 re.compile("(?P<quote>)(?(quote))")
119
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000120 def test_sub_template_numeric_escape(self):
121 # bug 776311 and friends
122 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
123 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
124 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
125 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
126 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
127 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
128 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
129
130 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
131 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
132
133 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
134 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
135 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
136 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
137 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
138
139 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
140 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000141
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000142 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
144 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
151 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
153 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
154
155 # in python2.3 (etc), these loop endlessly in sre_parser.py
156 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
157 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
158 'xz8')
159 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
160 'xza')
161
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000162 def test_qualified_re_sub(self):
163 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
164 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000165
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000166 def test_bug_114660(self):
167 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
168 'hello there')
169
170 def test_bug_462270(self):
171 # Test for empty sub() behaviour, see SF bug #462270
172 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
173 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
174
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000175 def test_symbolic_refs(self):
176 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
177 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
178 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
180 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
181 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
183 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000184 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000185
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000186 def test_re_subn(self):
187 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
188 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
189 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
190 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
191 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000192
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000193 def test_re_split(self):
194 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
195 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
196 self.assertEqual(re.split("(:*)", ":a:b::c"),
197 ['', ':', 'a', ':', 'b', '::', 'c'])
198 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
199 self.assertEqual(re.split("(:)*", ":a:b::c"),
200 ['', ':', 'a', ':', 'b', ':', 'c'])
201 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
202 ['', ':', 'a', ':b::', 'c'])
203 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
204 ['', None, ':', 'a', None, ':', '', 'b', None, '',
205 None, '::', 'c'])
206 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
207 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000208
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000209 def test_qualified_re_split(self):
210 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
211 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
212 self.assertEqual(re.split("(:)", ":a:b::c", 2),
213 ['', ':', 'a', ':', 'b::c'])
214 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
215 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000216
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000217 def test_re_findall(self):
218 self.assertEqual(re.findall(":+", "abc"), [])
219 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
220 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
221 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
222 (":", ":"),
223 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000224
Skip Montanaro5ba00542003-04-25 16:00:14 +0000225 def test_bug_117612(self):
226 self.assertEqual(re.findall(r"(a|(b))", "aba"),
227 [("a", ""),("b", "b"),("a", "")])
228
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000229 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000230 self.assertEqual(re.match('a', 'a').groups(), ())
231 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
232 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
233 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
234 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000235
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000236 pat = re.compile('((a)|(b))(c)?')
237 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
238 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
239 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
240 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
241 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000242
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000243 # A single group
244 m = re.match('(a)', 'a')
245 self.assertEqual(m.group(0), 'a')
246 self.assertEqual(m.group(0), 'a')
247 self.assertEqual(m.group(1), 'a')
248 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000249
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000250 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
251 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
252 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
253 (None, 'b', None))
254 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000255
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000256 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000257 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
258 ('(', 'a'))
259 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
260 (None, 'a'))
261 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
262 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
263 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
264 ('a', 'b'))
265 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
266 (None, 'd'))
267 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
268 (None, 'd'))
269 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
270 ('a', ''))
271
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000272 # Tests for bug #1177831: exercise groups other than the first group
273 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
274 self.assertEqual(p.match('abc').groups(),
275 ('a', 'b', 'c'))
276 self.assertEqual(p.match('ad').groups(),
277 ('a', None, 'd'))
278 self.assertEqual(p.match('abd'), None)
279 self.assertEqual(p.match('ac'), None)
280
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000281
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000282 def test_re_groupref(self):
283 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
284 ('|', 'a'))
285 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
286 (None, 'a'))
287 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
288 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
289 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
290 ('a', 'a'))
291 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
292 (None, None))
293
294 def test_groupdict(self):
295 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
296 'first second').groupdict(),
297 {'first':'first', 'second':'second'})
298
299 def test_expand(self):
300 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
301 "first second")
302 .expand(r"\2 \1 \g<second> \g<first>"),
303 "second first second first")
304
305 def test_repeat_minmax(self):
306 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
307 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
308 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
309 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
310
311 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
312 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
313 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
314 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
318 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
319
320 self.assertEqual(re.match("^x{1}$", "xxx"), None)
321 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
322 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
323 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
324
325 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
326 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
327 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
328 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
332 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
333
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000334 self.assertEqual(re.match("^x{}$", "xxx"), None)
335 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
336
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000337 def test_getattr(self):
338 self.assertEqual(re.match("(a)", "a").pos, 0)
339 self.assertEqual(re.match("(a)", "a").endpos, 1)
340 self.assertEqual(re.match("(a)", "a").string, "a")
341 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
342 self.assertNotEqual(re.match("(a)", "a").re, None)
343
344 def test_special_escapes(self):
345 self.assertEqual(re.search(r"\b(b.)\b",
346 "abcd abc bcd bx").group(1), "bx")
347 self.assertEqual(re.search(r"\B(b.)\B",
348 "abc bcd bc abxd").group(1), "bx")
349 self.assertEqual(re.search(r"\b(b.)\b",
350 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
351 self.assertEqual(re.search(r"\B(b.)\B",
352 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
353 self.assertEqual(re.search(r"\b(b.)\b",
354 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
355 self.assertEqual(re.search(r"\B(b.)\B",
356 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
357 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
358 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
359 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
360 self.assertEqual(re.search(r"\b(b.)\b",
361 u"abcd abc bcd bx").group(1), "bx")
362 self.assertEqual(re.search(r"\B(b.)\B",
363 u"abc bcd bc abxd").group(1), "bx")
364 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
365 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
366 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
367 self.assertEqual(re.search(r"\d\D\w\W\s\S",
368 "1aa! a").group(0), "1aa! a")
369 self.assertEqual(re.search(r"\d\D\w\W\s\S",
370 "1aa! a", re.LOCALE).group(0), "1aa! a")
371 self.assertEqual(re.search(r"\d\D\w\W\s\S",
372 "1aa! a", re.UNICODE).group(0), "1aa! a")
373
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000374 def test_bigcharset(self):
375 self.assertEqual(re.match(u"([\u2222\u2223])",
376 u"\u2222").group(1), u"\u2222")
377 self.assertEqual(re.match(u"([\u2222\u2223])",
378 u"\u2222", re.UNICODE).group(1), u"\u2222")
379
380 def test_anyall(self):
381 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
382 "a\nb")
383 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
384 "a\n\nb")
385
386 def test_non_consuming(self):
387 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
388 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
389 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
390 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
391 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
392 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
393 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
394
395 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
396 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
397 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
398 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
399
400 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000401 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
402 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000403 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
404 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
405 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
406 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
407 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
408 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
409 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
410 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
411
412 def test_category(self):
413 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
414
415 def test_getlower(self):
416 import _sre
417 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
418 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
419 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
420
421 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
422 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
423
424 def test_not_literal(self):
425 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
426 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
427
428 def test_search_coverage(self):
429 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
430 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
431
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000432 def test_re_escape(self):
433 p=""
434 for i in range(0, 256):
435 p = p + chr(i)
436 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
437 True)
438 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000439
Skip Montanaro1e703c62003-04-25 15:40:28 +0000440 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000441 self.assertEqual(pat.match(p) is not None, True)
442 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000443
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000444 def test_pickling(self):
445 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000446 self.pickle_test(pickle)
447 import cPickle
448 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000449 # old pickles expect the _compile() reconstructor in sre module
450 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000451 with warnings.catch_warnings():
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000452 warnings.filterwarnings("ignore", "The sre module is deprecated",
453 DeprecationWarning)
454 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000455
456 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000457 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
458 s = pickle.dumps(oldpat)
459 newpat = pickle.loads(s)
460 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000461
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000462 def test_constants(self):
463 self.assertEqual(re.I, re.IGNORECASE)
464 self.assertEqual(re.L, re.LOCALE)
465 self.assertEqual(re.M, re.MULTILINE)
466 self.assertEqual(re.S, re.DOTALL)
467 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000468
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000469 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000470 for flag in [re.I, re.M, re.X, re.S, re.L]:
471 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000472
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000473 def test_sre_character_literals(self):
474 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
475 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
476 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
477 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
478 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
479 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
480 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
481 self.assertRaises(re.error, re.match, "\911", "")
482
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000483 def test_sre_character_class_literals(self):
484 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
485 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
486 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
487 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
488 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
489 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
490 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
491 self.assertRaises(re.error, re.match, "[\911]", "")
492
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000493 def test_bug_113254(self):
494 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
495 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
496 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
497
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000498 def test_bug_527371(self):
499 # bug described in patches 527371/672491
500 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
501 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
502 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
503 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
504 self.assertEqual(re.match("((a))", "a").lastindex, 1)
505
506 def test_bug_545855(self):
507 # bug 545855 -- This pattern failed to cause a compile error as it
508 # should, instead provoking a TypeError.
509 self.assertRaises(re.error, re.compile, 'foo[a-')
510
511 def test_bug_418626(self):
512 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
513 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
514 # pattern '*?' on a long string.
515 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
516 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
517 20003)
518 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000519 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000520 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000521 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000522
523 def test_bug_612074(self):
524 pat=u"["+re.escape(u"\u2039")+u"]"
525 self.assertEqual(re.compile(pat) and 1, 1)
526
Skip Montanaro1e703c62003-04-25 15:40:28 +0000527 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000528 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000529 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000530 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
531 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
532 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000533
534 def test_scanner(self):
535 def s_ident(scanner, token): return token
536 def s_operator(scanner, token): return "op%s" % token
537 def s_float(scanner, token): return float(token)
538 def s_int(scanner, token): return int(token)
539
540 scanner = Scanner([
541 (r"[a-zA-Z_]\w*", s_ident),
542 (r"\d+\.\d*", s_float),
543 (r"\d+", s_int),
544 (r"=|\+|-|\*|/", s_operator),
545 (r"\s+", None),
546 ])
547
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000548 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
549
Skip Montanaro1e703c62003-04-25 15:40:28 +0000550 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
551 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
552 'op+', 'bar'], ''))
553
Skip Montanaro5ba00542003-04-25 16:00:14 +0000554 def test_bug_448951(self):
555 # bug 448951 (similar to 429357, but with single char match)
556 # (Also test greedy matches.)
557 for op in '','?','*':
558 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
559 (None, None))
560 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
561 ('a:', 'a'))
562
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000563 def test_bug_725106(self):
564 # capturing groups in alternatives in repeats
565 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
566 ('b', 'a'))
567 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
568 ('c', 'b'))
569 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
570 ('b', None))
571 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
572 ('b', None))
573 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
574 ('b', 'a'))
575 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
576 ('c', 'b'))
577 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
578 ('b', None))
579 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
580 ('b', None))
581
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000582 def test_bug_725149(self):
583 # mark_stack_base restoring before restoring marks
584 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
585 ('a', None))
586 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
587 ('a', None, None))
588
Just van Rossum12723ba2003-07-02 20:03:04 +0000589 def test_bug_764548(self):
590 # bug 764548, re.compile() barfs on str/unicode subclasses
591 try:
592 unicode
593 except NameError:
594 return # no problem if we have no unicode
595 class my_unicode(unicode): pass
596 pat = re.compile(my_unicode("abc"))
597 self.assertEqual(pat.match("xyz"), None)
598
Skip Montanaro5ba00542003-04-25 16:00:14 +0000599 def test_finditer(self):
600 iter = re.finditer(r":+", "a:b::c:::d")
601 self.assertEqual([item.group(0) for item in iter],
602 [":", "::", ":::"])
603
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000604 def test_bug_926075(self):
605 try:
606 unicode
607 except NameError:
608 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000609 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000610 re.compile(eval("u'bug_926075'")))
611
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000612 def test_bug_931848(self):
613 try:
614 unicode
615 except NameError:
616 pass
617 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
618 self.assertEqual(re.compile(pattern).split("a.b.c"),
619 ['a','b','c'])
620
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000621 def test_bug_581080(self):
622 iter = re.finditer(r"\s", "a b")
623 self.assertEqual(iter.next().span(), (1,2))
624 self.assertRaises(StopIteration, iter.next)
625
626 scanner = re.compile(r"\s").scanner("a b")
627 self.assertEqual(scanner.search().span(), (1, 2))
628 self.assertEqual(scanner.search(), None)
629
630 def test_bug_817234(self):
631 iter = re.finditer(r".*", "asdf")
632 self.assertEqual(iter.next().span(), (0, 4))
633 self.assertEqual(iter.next().span(), (4, 4))
634 self.assertRaises(StopIteration, iter.next)
635
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000636 def test_bug_6561(self):
637 # '\d' should match characters in Unicode category 'Nd'
638 # (Number, Decimal Digit), but not those in 'Nl' (Number,
639 # Letter) or 'No' (Number, Other).
640 decimal_digits = [
641 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
642 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
643 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
644 ]
645 for x in decimal_digits:
646 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
647
648 not_decimal_digits = [
649 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
650 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
651 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
652 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
653 ]
654 for x in not_decimal_digits:
655 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
656
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000657 def test_empty_array(self):
658 # SF buf 1647541
659 import array
660 for typecode in 'cbBuhHiIlLfd':
661 a = array.array(typecode)
662 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000663 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000664
Guido van Rossumae04c332008-01-03 19:12:44 +0000665 def test_inline_flags(self):
666 # Bug #1700
667 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
668 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
669
670 p = re.compile(upper_char, re.I | re.U)
671 q = p.match(lower_char)
672 self.assertNotEqual(q, None)
673
674 p = re.compile(lower_char, re.I | re.U)
675 q = p.match(upper_char)
676 self.assertNotEqual(q, None)
677
678 p = re.compile('(?i)' + upper_char, re.U)
679 q = p.match(lower_char)
680 self.assertNotEqual(q, None)
681
682 p = re.compile('(?i)' + lower_char, re.U)
683 q = p.match(upper_char)
684 self.assertNotEqual(q, None)
685
686 p = re.compile('(?iu)' + upper_char)
687 q = p.match(lower_char)
688 self.assertNotEqual(q, None)
689
690 p = re.compile('(?iu)' + lower_char)
691 q = p.match(upper_char)
692 self.assertNotEqual(q, None)
693
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000694 def test_dollar_matches_twice(self):
695 "$ matches the end of string, and just before the terminating \n"
696 pattern = re.compile('$')
697 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
698 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
699 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
700
701 pattern = re.compile('$', re.MULTILINE)
702 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
703 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
704 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
705
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000706 def test_dealloc(self):
707 # issue 3299: check for segfault in debug build
708 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000709 # the overflow limit is different on wide and narrow builds and it
710 # depends on the definition of SRE_CODE (see sre.h).
711 # 2**128 should be big enough to overflow on both. For smaller values
712 # a RuntimeError is raised instead of OverflowError.
713 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000714 self.assertRaises(TypeError, re.finditer, "a", {})
715 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000716
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000717def run_re_tests():
718 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
719 if verbose:
720 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000721 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000722 # To save time, only run the first and last 10 tests
723 #tests = tests[:10] + tests[-10:]
724 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000725
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000726 for t in tests:
727 sys.stdout.flush()
728 pattern = s = outcome = repl = expected = None
729 if len(t) == 5:
730 pattern, s, outcome, repl, expected = t
731 elif len(t) == 3:
732 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000733 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000734 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
735
Guido van Rossum41360a41998-03-26 19:42:58 +0000736 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000737 obj = re.compile(pattern)
738 except re.error:
739 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000740 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000741 print '=== Syntax error:', t
742 except KeyboardInterrupt: raise KeyboardInterrupt
743 except:
744 print '*** Unexpected error ***', t
745 if verbose:
746 traceback.print_exc(file=sys.stdout)
747 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000748 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000749 result = obj.search(s)
750 except re.error, msg:
751 print '=== Unexpected exception', t, repr(msg)
752 if outcome == SYNTAX_ERROR:
753 # This should have been a syntax error; forget it.
754 pass
755 elif outcome == FAIL:
756 if result is None: pass # No match, as expected
757 else: print '=== Succeeded incorrectly', t
758 elif outcome == SUCCEED:
759 if result is not None:
760 # Matched, as expected, so now we compute the
761 # result string and compare it to our expected result.
762 start, end = result.span(0)
763 vardict={'found': result.group(0),
764 'groups': result.group(),
765 'flags': result.re.flags}
766 for i in range(1, 100):
767 try:
768 gi = result.group(i)
769 # Special hack because else the string concat fails:
770 if gi is None:
771 gi = "None"
772 except IndexError:
773 gi = "Error"
774 vardict['g%d' % i] = gi
775 for i in result.re.groupindex.keys():
776 try:
777 gi = result.group(i)
778 if gi is None:
779 gi = "None"
780 except IndexError:
781 gi = "Error"
782 vardict[i] = gi
783 repl = eval(repl, vardict)
784 if repl != expected:
785 print '=== grouping error', t,
786 print repr(repl) + ' should be ' + repr(expected)
787 else:
788 print '=== Failed incorrectly', t
789
790 # Try the match on a unicode string, and check that it
791 # still succeeds.
792 try:
793 result = obj.search(unicode(s, "latin-1"))
794 if result is None:
795 print '=== Fails on unicode match', t
796 except NameError:
797 continue # 1.5.2
798 except TypeError:
799 continue # unicode test case
800
801 # Try the match on a unicode pattern, and check that it
802 # still succeeds.
803 obj=re.compile(unicode(pattern, "latin-1"))
804 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000805 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000806 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000807
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000808 # Try the match with the search area limited to the extent
809 # of the match and see if it still succeeds. \B will
810 # break (because it won't match at the end or start of a
811 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000812
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000813 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
814 and result is not None:
815 obj = re.compile(pattern)
816 result = obj.search(s, result.start(0), result.end(0) + 1)
817 if result is None:
818 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000819
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000820 # Try the match with IGNORECASE enabled, and check that it
821 # still succeeds.
822 obj = re.compile(pattern, re.IGNORECASE)
823 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000824 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000825 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000826
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000827 # Try the match with LOCALE enabled, and check that it
828 # still succeeds.
829 obj = re.compile(pattern, re.LOCALE)
830 result = obj.search(s)
831 if result is None:
832 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000833
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000834 # Try the match with UNICODE locale enabled, and check
835 # that it still succeeds.
836 obj = re.compile(pattern, re.UNICODE)
837 result = obj.search(s)
838 if result is None:
839 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000840
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000841def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000842 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000843 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000844
845if __name__ == "__main__":
846 test_main()