blob: 28e508c7d889adb1a56fd90f55f937db5b1f3c27 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Guido van Rossumaf554a02007-08-16 23:48:43 +00004from test.test_support import verbose, run_unittest, catch_warning
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Thomas Wouters1b7f8912007-09-19 03:06:30 +000086# This test makes no sense until re supports bytes, and should then probably
87# test for the *in*ability to mix bytes and str this way :)
88#
89# def test_bug_1140(self):
90# # re.sub(x, y, b'') should return b'', not '', and
91# # re.sub(x, y, '') should return '', not b''.
92# # Also:
93# # re.sub(x, y, str(x)) should return str(y), and
94# # re.sub(x, y, bytes(x)) should return
95# # str(y) if isinstance(y, str) else unicode(y).
96# for x in 'x', u'x':
97# for y in 'y', u'y':
98# z = re.sub(x, y, u'')
99# self.assertEqual(z, u'')
100# self.assertEqual(type(z), unicode)
101# #
102# z = re.sub(x, y, '')
103# self.assertEqual(z, '')
104# self.assertEqual(type(z), str)
105# #
106# z = re.sub(x, y, unicode(x))
107# self.assertEqual(z, y)
108# self.assertEqual(type(z), unicode)
109# #
110# z = re.sub(x, y, str(x))
111# self.assertEqual(z, y)
112# self.assertEqual(type(z), type(y))
113
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000114 def test_bug_1661(self):
115 # Verify that flags do not get silently ignored with compiled patterns
116 pattern = re.compile('.')
117 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.compile, pattern, re.I)
121
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000122 def test_sub_template_numeric_escape(self):
123 # bug 776311 and friends
124 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
125 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
126 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
127 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
128 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
129 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
130 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
131
132 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
134
135 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
136 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
137 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
138 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
139 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
140
141 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000143
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000144 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
153 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
155 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
156
157 # in python2.3 (etc), these loop endlessly in sre_parser.py
158 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
159 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
160 'xz8')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
162 'xza')
163
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000164 def test_qualified_re_sub(self):
165 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
166 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000167
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000168 def test_bug_114660(self):
169 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
170 'hello there')
171
172 def test_bug_462270(self):
173 # Test for empty sub() behaviour, see SF bug #462270
174 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
175 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
176
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000177 def test_symbolic_refs(self):
178 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
180 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
181 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
183 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
184 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000186 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000187
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000188 def test_re_subn(self):
189 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
190 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
191 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
192 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
193 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_split(self):
196 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
197 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
198 self.assertEqual(re.split("(:*)", ":a:b::c"),
199 ['', ':', 'a', ':', 'b', '::', 'c'])
200 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
201 self.assertEqual(re.split("(:)*", ":a:b::c"),
202 ['', ':', 'a', ':', 'b', ':', 'c'])
203 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
204 ['', ':', 'a', ':b::', 'c'])
205 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
206 ['', None, ':', 'a', None, ':', '', 'b', None, '',
207 None, '::', 'c'])
208 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
209 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 def test_qualified_re_split(self):
212 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
213 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
214 self.assertEqual(re.split("(:)", ":a:b::c", 2),
215 ['', ':', 'a', ':', 'b::c'])
216 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
217 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000218
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000219 def test_re_findall(self):
220 self.assertEqual(re.findall(":+", "abc"), [])
221 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
222 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
223 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
224 (":", ":"),
225 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro5ba00542003-04-25 16:00:14 +0000227 def test_bug_117612(self):
228 self.assertEqual(re.findall(r"(a|(b))", "aba"),
229 [("a", ""),("b", "b"),("a", "")])
230
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000231 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000232 self.assertEqual(re.match('a', 'a').groups(), ())
233 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
234 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
235 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
236 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000237
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000238 pat = re.compile('((a)|(b))(c)?')
239 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
240 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
241 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
242 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
243 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000244
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000245 # A single group
246 m = re.match('(a)', 'a')
247 self.assertEqual(m.group(0), 'a')
248 self.assertEqual(m.group(0), 'a')
249 self.assertEqual(m.group(1), 'a')
250 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000251
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000252 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
253 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
254 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
255 (None, 'b', None))
256 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000257
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000258 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000259 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
260 ('(', 'a'))
261 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
262 (None, 'a'))
263 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
264 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
265 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
266 ('a', 'b'))
267 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
268 (None, 'd'))
269 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
270 (None, 'd'))
271 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
272 ('a', ''))
273
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000274 # Tests for bug #1177831: exercise groups other than the first group
275 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
276 self.assertEqual(p.match('abc').groups(),
277 ('a', 'b', 'c'))
278 self.assertEqual(p.match('ad').groups(),
279 ('a', None, 'd'))
280 self.assertEqual(p.match('abd'), None)
281 self.assertEqual(p.match('ac'), None)
282
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000283
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 def test_re_groupref(self):
285 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
286 ('|', 'a'))
287 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
288 (None, 'a'))
289 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
290 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
291 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
292 ('a', 'a'))
293 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
294 (None, None))
295
296 def test_groupdict(self):
297 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
298 'first second').groupdict(),
299 {'first':'first', 'second':'second'})
300
301 def test_expand(self):
302 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
303 "first second")
304 .expand(r"\2 \1 \g<second> \g<first>"),
305 "second first second first")
306
307 def test_repeat_minmax(self):
308 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
309 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
310 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
311 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
312
313 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
314 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
318 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
319 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
320 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
321
322 self.assertEqual(re.match("^x{1}$", "xxx"), None)
323 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
324 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
325 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
326
327 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
328 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
332 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
333 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
334 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
335
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000336 self.assertEqual(re.match("^x{}$", "xxx"), None)
337 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
338
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000339 def test_getattr(self):
340 self.assertEqual(re.match("(a)", "a").pos, 0)
341 self.assertEqual(re.match("(a)", "a").endpos, 1)
342 self.assertEqual(re.match("(a)", "a").string, "a")
343 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
344 self.assertNotEqual(re.match("(a)", "a").re, None)
345
346 def test_special_escapes(self):
347 self.assertEqual(re.search(r"\b(b.)\b",
348 "abcd abc bcd bx").group(1), "bx")
349 self.assertEqual(re.search(r"\B(b.)\B",
350 "abc bcd bc abxd").group(1), "bx")
351 self.assertEqual(re.search(r"\b(b.)\b",
352 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
353 self.assertEqual(re.search(r"\B(b.)\B",
354 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
355 self.assertEqual(re.search(r"\b(b.)\b",
356 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
357 self.assertEqual(re.search(r"\B(b.)\B",
358 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
359 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
360 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
361 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
362 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000363 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000365 "abc bcd bc abxd").group(1), "bx")
366 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
367 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
368 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000369 self.assertEqual(re.search(r"\d\D\w\W\s\S",
370 "1aa! a").group(0), "1aa! a")
371 self.assertEqual(re.search(r"\d\D\w\W\s\S",
372 "1aa! a", re.LOCALE).group(0), "1aa! a")
373 self.assertEqual(re.search(r"\d\D\w\W\s\S",
374 "1aa! a", re.UNICODE).group(0), "1aa! a")
375
376 def test_ignore_case(self):
377 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000378 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000379
380 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000381 self.assertEqual(re.match("([\u2222\u2223])",
382 "\u2222").group(1), "\u2222")
383 self.assertEqual(re.match("([\u2222\u2223])",
384 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000385
386 def test_anyall(self):
387 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
388 "a\nb")
389 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
390 "a\n\nb")
391
392 def test_non_consuming(self):
393 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
394 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
395 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
396 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
397 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
398 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
399 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
400
401 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
402 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
403 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
404 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
405
406 def test_ignore_case(self):
407 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
408 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
409 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
410 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
411 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
412 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
413 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
414 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
415
416 def test_category(self):
417 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
418
419 def test_getlower(self):
420 import _sre
421 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
422 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
423 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
424
425 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000426 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000427
428 def test_not_literal(self):
429 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
430 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
431
432 def test_search_coverage(self):
433 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
434 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
435
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000436 def test_re_escape(self):
437 p=""
438 for i in range(0, 256):
439 p = p + chr(i)
440 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
441 True)
442 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000443
Skip Montanaro1e703c62003-04-25 15:40:28 +0000444 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000445 self.assertEqual(pat.match(p) is not None, True)
446 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000447
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000448 def test_pickling(self):
449 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000450 self.pickle_test(pickle)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 # old pickles expect the _compile() reconstructor in sre module
452 import warnings
Guido van Rossumaf554a02007-08-16 23:48:43 +0000453 with catch_warning():
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 warnings.filterwarnings("ignore", "The sre module is deprecated",
455 DeprecationWarning)
456 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000457
458 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000459 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
460 s = pickle.dumps(oldpat)
461 newpat = pickle.loads(s)
462 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000463
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000464 def test_constants(self):
465 self.assertEqual(re.I, re.IGNORECASE)
466 self.assertEqual(re.L, re.LOCALE)
467 self.assertEqual(re.M, re.MULTILINE)
468 self.assertEqual(re.S, re.DOTALL)
469 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000470
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000471 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000472 for flag in [re.I, re.M, re.X, re.S, re.L]:
473 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000474
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000475 def test_sre_character_literals(self):
476 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
477 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
478 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
479 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
480 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
481 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
482 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
483 self.assertRaises(re.error, re.match, "\911", "")
484
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000485 def test_sre_character_class_literals(self):
486 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
487 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
488 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
489 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
490 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
491 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
492 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
493 self.assertRaises(re.error, re.match, "[\911]", "")
494
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000495 def test_bug_113254(self):
496 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
497 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
498 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
499
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000500 def test_bug_527371(self):
501 # bug described in patches 527371/672491
502 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
503 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
504 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
505 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
506 self.assertEqual(re.match("((a))", "a").lastindex, 1)
507
508 def test_bug_545855(self):
509 # bug 545855 -- This pattern failed to cause a compile error as it
510 # should, instead provoking a TypeError.
511 self.assertRaises(re.error, re.compile, 'foo[a-')
512
513 def test_bug_418626(self):
514 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
515 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
516 # pattern '*?' on a long string.
517 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
518 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
519 20003)
520 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000521 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000522 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000523 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000524
525 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000526 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000527 self.assertEqual(re.compile(pat) and 1, 1)
528
Skip Montanaro1e703c62003-04-25 15:40:28 +0000529 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000530 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000531 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000532 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
533 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
534 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000535
536 def test_scanner(self):
537 def s_ident(scanner, token): return token
538 def s_operator(scanner, token): return "op%s" % token
539 def s_float(scanner, token): return float(token)
540 def s_int(scanner, token): return int(token)
541
542 scanner = Scanner([
543 (r"[a-zA-Z_]\w*", s_ident),
544 (r"\d+\.\d*", s_float),
545 (r"\d+", s_int),
546 (r"=|\+|-|\*|/", s_operator),
547 (r"\s+", None),
548 ])
549
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000550 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
551
Skip Montanaro1e703c62003-04-25 15:40:28 +0000552 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
553 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
554 'op+', 'bar'], ''))
555
Skip Montanaro5ba00542003-04-25 16:00:14 +0000556 def test_bug_448951(self):
557 # bug 448951 (similar to 429357, but with single char match)
558 # (Also test greedy matches.)
559 for op in '','?','*':
560 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
561 (None, None))
562 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
563 ('a:', 'a'))
564
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000565 def test_bug_725106(self):
566 # capturing groups in alternatives in repeats
567 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
568 ('b', 'a'))
569 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
570 ('c', 'b'))
571 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
572 ('b', None))
573 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
574 ('b', None))
575 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
576 ('b', 'a'))
577 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
578 ('c', 'b'))
579 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
580 ('b', None))
581 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
582 ('b', None))
583
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000584 def test_bug_725149(self):
585 # mark_stack_base restoring before restoring marks
586 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
587 ('a', None))
588 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
589 ('a', None, None))
590
Just van Rossum12723ba2003-07-02 20:03:04 +0000591 def test_bug_764548(self):
592 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000593 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000594 pat = re.compile(my_unicode("abc"))
595 self.assertEqual(pat.match("xyz"), None)
596
Skip Montanaro5ba00542003-04-25 16:00:14 +0000597 def test_finditer(self):
598 iter = re.finditer(r":+", "a:b::c:::d")
599 self.assertEqual([item.group(0) for item in iter],
600 [":", "::", ":::"])
601
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000602 # XXX This needs to be restored for str vs. bytes.
603## def test_bug_926075(self):
604## self.assert_(re.compile('bug_926075') is not
605## re.compile(str8('bug_926075')))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000606
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000607 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000608 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000609 self.assertEqual(re.compile(pattern).split("a.b.c"),
610 ['a','b','c'])
611
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000612 def test_bug_581080(self):
613 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000614 self.assertEqual(next(iter).span(), (1,2))
615 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000616
617 scanner = re.compile(r"\s").scanner("a b")
618 self.assertEqual(scanner.search().span(), (1, 2))
619 self.assertEqual(scanner.search(), None)
620
621 def test_bug_817234(self):
622 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000623 self.assertEqual(next(iter).span(), (0, 4))
624 self.assertEqual(next(iter).span(), (4, 4))
625 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000626
Guido van Rossumd8faa362007-04-27 19:54:29 +0000627 def test_empty_array(self):
628 # SF buf 1647541
629 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000630 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631 a = array.array(typecode)
632 self.assertEqual(re.compile("bla").match(a), None)
633 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000634
Christian Heimes072c0f12008-01-03 23:01:04 +0000635 def test_inline_flags(self):
636 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000637 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
638 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000639
640 p = re.compile(upper_char, re.I | re.U)
641 q = p.match(lower_char)
642 self.assertNotEqual(q, None)
643
644 p = re.compile(lower_char, re.I | re.U)
645 q = p.match(upper_char)
646 self.assertNotEqual(q, None)
647
648 p = re.compile('(?i)' + upper_char, re.U)
649 q = p.match(lower_char)
650 self.assertNotEqual(q, None)
651
652 p = re.compile('(?i)' + lower_char, re.U)
653 q = p.match(upper_char)
654 self.assertNotEqual(q, None)
655
656 p = re.compile('(?iu)' + upper_char)
657 q = p.match(lower_char)
658 self.assertNotEqual(q, None)
659
660 p = re.compile('(?iu)' + lower_char)
661 q = p.match(upper_char)
662 self.assertNotEqual(q, None)
663
Christian Heimes25bb7832008-01-11 16:17:00 +0000664 def test_dollar_matches_twice(self):
665 "$ matches the end of string, and just before the terminating \n"
666 pattern = re.compile('$')
667 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
668 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
669 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
670
671 pattern = re.compile('$', re.MULTILINE)
672 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
673 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
674 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
675
Christian Heimes072c0f12008-01-03 23:01:04 +0000676
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000677def run_re_tests():
678 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
679 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000680 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000681 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000682 # To save time, only run the first and last 10 tests
683 #tests = tests[:10] + tests[-10:]
684 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000685
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000686 for t in tests:
687 sys.stdout.flush()
688 pattern = s = outcome = repl = expected = None
689 if len(t) == 5:
690 pattern, s, outcome, repl, expected = t
691 elif len(t) == 3:
692 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000693 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000694 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000695
Guido van Rossum41360a41998-03-26 19:42:58 +0000696 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000697 obj = re.compile(pattern)
698 except re.error:
699 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000700 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000701 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000702 except KeyboardInterrupt: raise KeyboardInterrupt
703 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000704 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000705 if verbose:
706 traceback.print_exc(file=sys.stdout)
707 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000708 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000709 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000710 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000711 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000712 if outcome == SYNTAX_ERROR:
713 # This should have been a syntax error; forget it.
714 pass
715 elif outcome == FAIL:
716 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000717 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000718 elif outcome == SUCCEED:
719 if result is not None:
720 # Matched, as expected, so now we compute the
721 # result string and compare it to our expected result.
722 start, end = result.span(0)
723 vardict={'found': result.group(0),
724 'groups': result.group(),
725 'flags': result.re.flags}
726 for i in range(1, 100):
727 try:
728 gi = result.group(i)
729 # Special hack because else the string concat fails:
730 if gi is None:
731 gi = "None"
732 except IndexError:
733 gi = "Error"
734 vardict['g%d' % i] = gi
735 for i in result.re.groupindex.keys():
736 try:
737 gi = result.group(i)
738 if gi is None:
739 gi = "None"
740 except IndexError:
741 gi = "Error"
742 vardict[i] = gi
743 repl = eval(repl, vardict)
744 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000745 print('=== grouping error', t, end=' ')
746 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000747 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000748 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000749
750 # Try the match on a unicode string, and check that it
751 # still succeeds.
752 try:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000753 result = obj.search(str(s, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000754 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000755 print('=== Fails on unicode match', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000756 except NameError:
757 continue # 1.5.2
758 except TypeError:
759 continue # unicode test case
760
761 # Try the match on a unicode pattern, and check that it
762 # still succeeds.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000763 obj=re.compile(str(pattern, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000764 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000765 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000766 print('=== Fails on unicode pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000767
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000768 # Try the match with the search area limited to the extent
769 # of the match and see if it still succeeds. \B will
770 # break (because it won't match at the end or start of a
771 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000772
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000773 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
774 and result is not None:
775 obj = re.compile(pattern)
776 result = obj.search(s, result.start(0), result.end(0) + 1)
777 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000778 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000779
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000780 # Try the match with IGNORECASE enabled, and check that it
781 # still succeeds.
782 obj = re.compile(pattern, re.IGNORECASE)
783 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000784 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000785 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000786
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000787 # Try the match with LOCALE enabled, and check that it
788 # still succeeds.
789 obj = re.compile(pattern, re.LOCALE)
790 result = obj.search(s)
791 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000792 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000793
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000794 # Try the match with UNICODE locale enabled, and check
795 # that it still succeeds.
796 obj = re.compile(pattern, re.UNICODE)
797 result = obj.search(s)
798 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000799 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000800
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000801def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000802 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000803 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000804
805if __name__ == "__main__":
806 test_main()