blob: c4cc8208ede0156a8fb3687add8c0f34a9c912f7 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Brett Cannon672237d2008-09-09 00:49:16 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Guido van Rossum1ff91d92007-09-10 22:02:25 +000086 def test_bug_1140(self):
87 # re.sub(x, y, u'') should return u'', not '', and
88 # re.sub(x, y, '') should return '', not u''.
89 # Also:
90 # re.sub(x, y, unicode(x)) should return unicode(y), and
91 # re.sub(x, y, str(x)) should return
92 # str(y) if isinstance(y, str) else unicode(y).
93 for x in 'x', u'x':
94 for y in 'y', u'y':
95 z = re.sub(x, y, u'')
96 self.assertEqual(z, u'')
97 self.assertEqual(type(z), unicode)
98 #
99 z = re.sub(x, y, '')
100 self.assertEqual(z, '')
101 self.assertEqual(type(z), str)
102 #
103 z = re.sub(x, y, unicode(x))
104 self.assertEqual(z, y)
105 self.assertEqual(type(z), unicode)
106 #
107 z = re.sub(x, y, str(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), type(y))
110
Raymond Hettinger80016c92007-12-19 18:13:31 +0000111 def test_bug_1661(self):
112 # Verify that flags do not get silently ignored with compiled patterns
113 pattern = re.compile('.')
114 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.compile, pattern, re.I)
118
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000119 def test_bug_3629(self):
120 # A regex that triggered a bug in the sre-code validator
121 re.compile("(?P<quote>)(?(quote))")
122
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000123 def test_sub_template_numeric_escape(self):
124 # bug 776311 and friends
125 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
126 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
127 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
128 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
129 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
130 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
131 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
132
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
134 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
135
136 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
137 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
138 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
139 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
140 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
141
142 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
143 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000144
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000145 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
154 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
156 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
157
158 # in python2.3 (etc), these loop endlessly in sre_parser.py
159 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
160 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
161 'xz8')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
163 'xza')
164
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000165 def test_qualified_re_sub(self):
166 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
167 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000168
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000169 def test_bug_114660(self):
170 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
171 'hello there')
172
173 def test_bug_462270(self):
174 # Test for empty sub() behaviour, see SF bug #462270
175 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
176 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
177
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000178 def test_symbolic_refs(self):
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
180 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
181 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
183 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
184 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
186 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000188
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000189 def test_re_subn(self):
190 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
191 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
192 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
193 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
194 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000195
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000196 def test_re_split(self):
197 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
198 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
199 self.assertEqual(re.split("(:*)", ":a:b::c"),
200 ['', ':', 'a', ':', 'b', '::', 'c'])
201 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
202 self.assertEqual(re.split("(:)*", ":a:b::c"),
203 ['', ':', 'a', ':', 'b', ':', 'c'])
204 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
205 ['', ':', 'a', ':b::', 'c'])
206 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
207 ['', None, ':', 'a', None, ':', '', 'b', None, '',
208 None, '::', 'c'])
209 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
210 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000211
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000212 def test_qualified_re_split(self):
213 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
214 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
215 self.assertEqual(re.split("(:)", ":a:b::c", 2),
216 ['', ':', 'a', ':', 'b::c'])
217 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
218 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 def test_re_findall(self):
221 self.assertEqual(re.findall(":+", "abc"), [])
222 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
223 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
224 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
225 (":", ":"),
226 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000227
Skip Montanaro5ba00542003-04-25 16:00:14 +0000228 def test_bug_117612(self):
229 self.assertEqual(re.findall(r"(a|(b))", "aba"),
230 [("a", ""),("b", "b"),("a", "")])
231
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000232 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000233 self.assertEqual(re.match('a', 'a').groups(), ())
234 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
235 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
236 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
237 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000238
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000239 pat = re.compile('((a)|(b))(c)?')
240 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
241 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
242 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
243 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
244 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000245
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000246 # A single group
247 m = re.match('(a)', 'a')
248 self.assertEqual(m.group(0), 'a')
249 self.assertEqual(m.group(0), 'a')
250 self.assertEqual(m.group(1), 'a')
251 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000252
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000253 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
254 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
255 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
256 (None, 'b', None))
257 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000258
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000259 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000260 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
261 ('(', 'a'))
262 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
263 (None, 'a'))
264 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
265 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
266 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
267 ('a', 'b'))
268 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
269 (None, 'd'))
270 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
271 (None, 'd'))
272 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
273 ('a', ''))
274
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000275 # Tests for bug #1177831: exercise groups other than the first group
276 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
277 self.assertEqual(p.match('abc').groups(),
278 ('a', 'b', 'c'))
279 self.assertEqual(p.match('ad').groups(),
280 ('a', None, 'd'))
281 self.assertEqual(p.match('abd'), None)
282 self.assertEqual(p.match('ac'), None)
283
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000284
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000285 def test_re_groupref(self):
286 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
287 ('|', 'a'))
288 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
289 (None, 'a'))
290 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
291 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
292 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
293 ('a', 'a'))
294 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
295 (None, None))
296
297 def test_groupdict(self):
298 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
299 'first second').groupdict(),
300 {'first':'first', 'second':'second'})
301
302 def test_expand(self):
303 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
304 "first second")
305 .expand(r"\2 \1 \g<second> \g<first>"),
306 "second first second first")
307
308 def test_repeat_minmax(self):
309 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
310 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
311 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
312 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
313
314 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
318 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
319 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
320 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
321 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
322
323 self.assertEqual(re.match("^x{1}$", "xxx"), None)
324 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
325 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
326 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
327
328 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
332 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
333 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
334 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
335 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
336
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000337 self.assertEqual(re.match("^x{}$", "xxx"), None)
338 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
339
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000340 def test_getattr(self):
341 self.assertEqual(re.match("(a)", "a").pos, 0)
342 self.assertEqual(re.match("(a)", "a").endpos, 1)
343 self.assertEqual(re.match("(a)", "a").string, "a")
344 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
345 self.assertNotEqual(re.match("(a)", "a").re, None)
346
347 def test_special_escapes(self):
348 self.assertEqual(re.search(r"\b(b.)\b",
349 "abcd abc bcd bx").group(1), "bx")
350 self.assertEqual(re.search(r"\B(b.)\B",
351 "abc bcd bc abxd").group(1), "bx")
352 self.assertEqual(re.search(r"\b(b.)\b",
353 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
354 self.assertEqual(re.search(r"\B(b.)\B",
355 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
356 self.assertEqual(re.search(r"\b(b.)\b",
357 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
358 self.assertEqual(re.search(r"\B(b.)\B",
359 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
360 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
361 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
362 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
363 self.assertEqual(re.search(r"\b(b.)\b",
364 u"abcd abc bcd bx").group(1), "bx")
365 self.assertEqual(re.search(r"\B(b.)\B",
366 u"abc bcd bc abxd").group(1), "bx")
367 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
368 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
369 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
370 self.assertEqual(re.search(r"\d\D\w\W\s\S",
371 "1aa! a").group(0), "1aa! a")
372 self.assertEqual(re.search(r"\d\D\w\W\s\S",
373 "1aa! a", re.LOCALE).group(0), "1aa! a")
374 self.assertEqual(re.search(r"\d\D\w\W\s\S",
375 "1aa! a", re.UNICODE).group(0), "1aa! a")
376
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000377 def test_bigcharset(self):
378 self.assertEqual(re.match(u"([\u2222\u2223])",
379 u"\u2222").group(1), u"\u2222")
380 self.assertEqual(re.match(u"([\u2222\u2223])",
381 u"\u2222", re.UNICODE).group(1), u"\u2222")
382
383 def test_anyall(self):
384 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
385 "a\nb")
386 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
387 "a\n\nb")
388
389 def test_non_consuming(self):
390 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
391 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
392 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
393 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
394 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
395 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
396 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
397
398 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
399 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
400 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
401 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
402
403 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000404 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
405 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000406 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
407 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
408 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
409 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
410 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
411 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
412 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
413 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
414
415 def test_category(self):
416 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
417
418 def test_getlower(self):
419 import _sre
420 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
421 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
422 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
423
424 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
425 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
426
427 def test_not_literal(self):
428 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
429 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
430
431 def test_search_coverage(self):
432 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
433 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
434
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000435 def test_re_escape(self):
436 p=""
437 for i in range(0, 256):
438 p = p + chr(i)
439 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
440 True)
441 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000442
Skip Montanaro1e703c62003-04-25 15:40:28 +0000443 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000444 self.assertEqual(pat.match(p) is not None, True)
445 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000446
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000447 def test_pickling(self):
448 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000449 self.pickle_test(pickle)
450 import cPickle
451 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000452 # old pickles expect the _compile() reconstructor in sre module
453 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000454 with warnings.catch_warnings():
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000455 warnings.filterwarnings("ignore", "The sre module is deprecated",
456 DeprecationWarning)
457 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000458
459 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000460 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
461 s = pickle.dumps(oldpat)
462 newpat = pickle.loads(s)
463 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000464
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000465 def test_constants(self):
466 self.assertEqual(re.I, re.IGNORECASE)
467 self.assertEqual(re.L, re.LOCALE)
468 self.assertEqual(re.M, re.MULTILINE)
469 self.assertEqual(re.S, re.DOTALL)
470 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000471
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000472 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000473 for flag in [re.I, re.M, re.X, re.S, re.L]:
474 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000475
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000476 def test_sre_character_literals(self):
477 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
478 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
479 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
480 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
481 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
482 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
483 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
484 self.assertRaises(re.error, re.match, "\911", "")
485
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000486 def test_sre_character_class_literals(self):
487 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
488 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
489 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
490 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
491 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
492 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
493 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
494 self.assertRaises(re.error, re.match, "[\911]", "")
495
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000496 def test_bug_113254(self):
497 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
498 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
499 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
500
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000501 def test_bug_527371(self):
502 # bug described in patches 527371/672491
503 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
504 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
505 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
506 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
507 self.assertEqual(re.match("((a))", "a").lastindex, 1)
508
509 def test_bug_545855(self):
510 # bug 545855 -- This pattern failed to cause a compile error as it
511 # should, instead provoking a TypeError.
512 self.assertRaises(re.error, re.compile, 'foo[a-')
513
514 def test_bug_418626(self):
515 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
516 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
517 # pattern '*?' on a long string.
518 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
519 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
520 20003)
521 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000522 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000523 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000524 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000525
526 def test_bug_612074(self):
527 pat=u"["+re.escape(u"\u2039")+u"]"
528 self.assertEqual(re.compile(pat) and 1, 1)
529
Skip Montanaro1e703c62003-04-25 15:40:28 +0000530 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000531 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000532 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000533 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
534 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
535 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000536
537 def test_scanner(self):
538 def s_ident(scanner, token): return token
539 def s_operator(scanner, token): return "op%s" % token
540 def s_float(scanner, token): return float(token)
541 def s_int(scanner, token): return int(token)
542
543 scanner = Scanner([
544 (r"[a-zA-Z_]\w*", s_ident),
545 (r"\d+\.\d*", s_float),
546 (r"\d+", s_int),
547 (r"=|\+|-|\*|/", s_operator),
548 (r"\s+", None),
549 ])
550
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000551 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
552
Skip Montanaro1e703c62003-04-25 15:40:28 +0000553 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
554 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
555 'op+', 'bar'], ''))
556
Skip Montanaro5ba00542003-04-25 16:00:14 +0000557 def test_bug_448951(self):
558 # bug 448951 (similar to 429357, but with single char match)
559 # (Also test greedy matches.)
560 for op in '','?','*':
561 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
562 (None, None))
563 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
564 ('a:', 'a'))
565
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000566 def test_bug_725106(self):
567 # capturing groups in alternatives in repeats
568 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
569 ('b', 'a'))
570 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
571 ('c', 'b'))
572 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
573 ('b', None))
574 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
575 ('b', None))
576 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
577 ('b', 'a'))
578 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
579 ('c', 'b'))
580 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
581 ('b', None))
582 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
583 ('b', None))
584
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000585 def test_bug_725149(self):
586 # mark_stack_base restoring before restoring marks
587 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
588 ('a', None))
589 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
590 ('a', None, None))
591
Just van Rossum12723ba2003-07-02 20:03:04 +0000592 def test_bug_764548(self):
593 # bug 764548, re.compile() barfs on str/unicode subclasses
594 try:
595 unicode
596 except NameError:
597 return # no problem if we have no unicode
598 class my_unicode(unicode): pass
599 pat = re.compile(my_unicode("abc"))
600 self.assertEqual(pat.match("xyz"), None)
601
Skip Montanaro5ba00542003-04-25 16:00:14 +0000602 def test_finditer(self):
603 iter = re.finditer(r":+", "a:b::c:::d")
604 self.assertEqual([item.group(0) for item in iter],
605 [":", "::", ":::"])
606
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000607 def test_bug_926075(self):
608 try:
609 unicode
610 except NameError:
611 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000612 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000613 re.compile(eval("u'bug_926075'")))
614
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000615 def test_bug_931848(self):
616 try:
617 unicode
618 except NameError:
619 pass
620 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
621 self.assertEqual(re.compile(pattern).split("a.b.c"),
622 ['a','b','c'])
623
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000624 def test_bug_581080(self):
625 iter = re.finditer(r"\s", "a b")
626 self.assertEqual(iter.next().span(), (1,2))
627 self.assertRaises(StopIteration, iter.next)
628
629 scanner = re.compile(r"\s").scanner("a b")
630 self.assertEqual(scanner.search().span(), (1, 2))
631 self.assertEqual(scanner.search(), None)
632
633 def test_bug_817234(self):
634 iter = re.finditer(r".*", "asdf")
635 self.assertEqual(iter.next().span(), (0, 4))
636 self.assertEqual(iter.next().span(), (4, 4))
637 self.assertRaises(StopIteration, iter.next)
638
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000639 def test_bug_6561(self):
640 # '\d' should match characters in Unicode category 'Nd'
641 # (Number, Decimal Digit), but not those in 'Nl' (Number,
642 # Letter) or 'No' (Number, Other).
643 decimal_digits = [
644 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
645 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
646 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
647 ]
648 for x in decimal_digits:
649 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
650
651 not_decimal_digits = [
652 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
653 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
654 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
655 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
656 ]
657 for x in not_decimal_digits:
658 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
659
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000660 def test_empty_array(self):
661 # SF buf 1647541
662 import array
663 for typecode in 'cbBuhHiIlLfd':
664 a = array.array(typecode)
665 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000666 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000667
Guido van Rossumae04c332008-01-03 19:12:44 +0000668 def test_inline_flags(self):
669 # Bug #1700
670 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
671 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
672
673 p = re.compile(upper_char, re.I | re.U)
674 q = p.match(lower_char)
675 self.assertNotEqual(q, None)
676
677 p = re.compile(lower_char, re.I | re.U)
678 q = p.match(upper_char)
679 self.assertNotEqual(q, None)
680
681 p = re.compile('(?i)' + upper_char, re.U)
682 q = p.match(lower_char)
683 self.assertNotEqual(q, None)
684
685 p = re.compile('(?i)' + lower_char, re.U)
686 q = p.match(upper_char)
687 self.assertNotEqual(q, None)
688
689 p = re.compile('(?iu)' + upper_char)
690 q = p.match(lower_char)
691 self.assertNotEqual(q, None)
692
693 p = re.compile('(?iu)' + lower_char)
694 q = p.match(upper_char)
695 self.assertNotEqual(q, None)
696
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000697 def test_dollar_matches_twice(self):
698 "$ matches the end of string, and just before the terminating \n"
699 pattern = re.compile('$')
700 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
701 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
702 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
703
704 pattern = re.compile('$', re.MULTILINE)
705 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
706 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
707 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
708
Guido van Rossumae04c332008-01-03 19:12:44 +0000709
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000710def run_re_tests():
711 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
712 if verbose:
713 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000714 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000715 # To save time, only run the first and last 10 tests
716 #tests = tests[:10] + tests[-10:]
717 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000718
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000719 for t in tests:
720 sys.stdout.flush()
721 pattern = s = outcome = repl = expected = None
722 if len(t) == 5:
723 pattern, s, outcome, repl, expected = t
724 elif len(t) == 3:
725 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000726 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000727 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
728
Guido van Rossum41360a41998-03-26 19:42:58 +0000729 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000730 obj = re.compile(pattern)
731 except re.error:
732 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000733 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000734 print '=== Syntax error:', t
735 except KeyboardInterrupt: raise KeyboardInterrupt
736 except:
737 print '*** Unexpected error ***', t
738 if verbose:
739 traceback.print_exc(file=sys.stdout)
740 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000741 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000742 result = obj.search(s)
743 except re.error, msg:
744 print '=== Unexpected exception', t, repr(msg)
745 if outcome == SYNTAX_ERROR:
746 # This should have been a syntax error; forget it.
747 pass
748 elif outcome == FAIL:
749 if result is None: pass # No match, as expected
750 else: print '=== Succeeded incorrectly', t
751 elif outcome == SUCCEED:
752 if result is not None:
753 # Matched, as expected, so now we compute the
754 # result string and compare it to our expected result.
755 start, end = result.span(0)
756 vardict={'found': result.group(0),
757 'groups': result.group(),
758 'flags': result.re.flags}
759 for i in range(1, 100):
760 try:
761 gi = result.group(i)
762 # Special hack because else the string concat fails:
763 if gi is None:
764 gi = "None"
765 except IndexError:
766 gi = "Error"
767 vardict['g%d' % i] = gi
768 for i in result.re.groupindex.keys():
769 try:
770 gi = result.group(i)
771 if gi is None:
772 gi = "None"
773 except IndexError:
774 gi = "Error"
775 vardict[i] = gi
776 repl = eval(repl, vardict)
777 if repl != expected:
778 print '=== grouping error', t,
779 print repr(repl) + ' should be ' + repr(expected)
780 else:
781 print '=== Failed incorrectly', t
782
783 # Try the match on a unicode string, and check that it
784 # still succeeds.
785 try:
786 result = obj.search(unicode(s, "latin-1"))
787 if result is None:
788 print '=== Fails on unicode match', t
789 except NameError:
790 continue # 1.5.2
791 except TypeError:
792 continue # unicode test case
793
794 # Try the match on a unicode pattern, and check that it
795 # still succeeds.
796 obj=re.compile(unicode(pattern, "latin-1"))
797 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000798 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000799 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000800
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000801 # Try the match with the search area limited to the extent
802 # of the match and see if it still succeeds. \B will
803 # break (because it won't match at the end or start of a
804 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000805
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000806 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
807 and result is not None:
808 obj = re.compile(pattern)
809 result = obj.search(s, result.start(0), result.end(0) + 1)
810 if result is None:
811 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000812
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000813 # Try the match with IGNORECASE enabled, and check that it
814 # still succeeds.
815 obj = re.compile(pattern, re.IGNORECASE)
816 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000817 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000818 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000819
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000820 # Try the match with LOCALE enabled, and check that it
821 # still succeeds.
822 obj = re.compile(pattern, re.LOCALE)
823 result = obj.search(s)
824 if result is None:
825 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000826
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000827 # Try the match with UNICODE locale enabled, and check
828 # that it still succeeds.
829 obj = re.compile(pattern, re.UNICODE)
830 result = obj.search(s)
831 if result is None:
832 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000833
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000834def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000835 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000836 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000837
838if __name__ == "__main__":
839 test_main()