blob: 17370bb45642da9e81faab261140a4030315e3b2 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Benjamin Petersonee8712c2008-05-20 21:35:26 +00004from test.support import verbose, run_unittest, catch_warning
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000086 def test_bug_1661(self):
87 # Verify that flags do not get silently ignored with compiled patterns
88 pattern = re.compile('.')
89 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
90 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
91 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
92 self.assertRaises(ValueError, re.compile, pattern, re.I)
93
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000094 def test_sub_template_numeric_escape(self):
95 # bug 776311 and friends
96 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
97 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
98 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
99 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
100 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
101 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
102 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
103
104 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
105 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
106
107 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
108 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
109 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
110 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
111 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
112
113 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
114 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000115
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000116 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
117 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
118 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
119 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
120 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
121 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
122 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
123 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
124 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
125 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
126 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
127 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
128
129 # in python2.3 (etc), these loop endlessly in sre_parser.py
130 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
131 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
132 'xz8')
133 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
134 'xza')
135
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000136 def test_qualified_re_sub(self):
137 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
138 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000139
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000140 def test_bug_114660(self):
141 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
142 'hello there')
143
144 def test_bug_462270(self):
145 # Test for empty sub() behaviour, see SF bug #462270
146 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
147 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
148
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000149 def test_symbolic_refs(self):
150 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
151 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
152 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
153 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
154 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
155 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
156 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
157 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000158 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000159
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000160 def test_re_subn(self):
161 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
162 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
163 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
164 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
165 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000166
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000167 def test_re_split(self):
168 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
169 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
170 self.assertEqual(re.split("(:*)", ":a:b::c"),
171 ['', ':', 'a', ':', 'b', '::', 'c'])
172 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
173 self.assertEqual(re.split("(:)*", ":a:b::c"),
174 ['', ':', 'a', ':', 'b', ':', 'c'])
175 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
176 ['', ':', 'a', ':b::', 'c'])
177 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
178 ['', None, ':', 'a', None, ':', '', 'b', None, '',
179 None, '::', 'c'])
180 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
181 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000182
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000183 def test_qualified_re_split(self):
184 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
185 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
186 self.assertEqual(re.split("(:)", ":a:b::c", 2),
187 ['', ':', 'a', ':', 'b::c'])
188 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
189 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000190
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000191 def test_re_findall(self):
192 self.assertEqual(re.findall(":+", "abc"), [])
193 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
194 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
195 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
196 (":", ":"),
197 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000198
Skip Montanaro5ba00542003-04-25 16:00:14 +0000199 def test_bug_117612(self):
200 self.assertEqual(re.findall(r"(a|(b))", "aba"),
201 [("a", ""),("b", "b"),("a", "")])
202
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000203 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000204 self.assertEqual(re.match('a', 'a').groups(), ())
205 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
206 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
207 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
208 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000209
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000210 pat = re.compile('((a)|(b))(c)?')
211 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
212 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
213 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
214 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
215 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000216
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000217 # A single group
218 m = re.match('(a)', 'a')
219 self.assertEqual(m.group(0), 'a')
220 self.assertEqual(m.group(0), 'a')
221 self.assertEqual(m.group(1), 'a')
222 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000223
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000224 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
225 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
226 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
227 (None, 'b', None))
228 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000229
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000230 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000231 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
232 ('(', 'a'))
233 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
234 (None, 'a'))
235 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
236 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
237 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
238 ('a', 'b'))
239 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
240 (None, 'd'))
241 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
242 (None, 'd'))
243 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
244 ('a', ''))
245
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000246 # Tests for bug #1177831: exercise groups other than the first group
247 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
248 self.assertEqual(p.match('abc').groups(),
249 ('a', 'b', 'c'))
250 self.assertEqual(p.match('ad').groups(),
251 ('a', None, 'd'))
252 self.assertEqual(p.match('abd'), None)
253 self.assertEqual(p.match('ac'), None)
254
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000255
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000256 def test_re_groupref(self):
257 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
258 ('|', 'a'))
259 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
260 (None, 'a'))
261 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
262 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
263 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
264 ('a', 'a'))
265 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
266 (None, None))
267
268 def test_groupdict(self):
269 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
270 'first second').groupdict(),
271 {'first':'first', 'second':'second'})
272
273 def test_expand(self):
274 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
275 "first second")
276 .expand(r"\2 \1 \g<second> \g<first>"),
277 "second first second first")
278
279 def test_repeat_minmax(self):
280 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
281 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
282 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
283 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
284
285 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
286 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
287 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
288 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
289 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
290 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
291 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
292 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
293
294 self.assertEqual(re.match("^x{1}$", "xxx"), None)
295 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
296 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
297 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
298
299 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
300 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
301 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
302 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
303 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
304 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
305 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
306 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
307
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000308 self.assertEqual(re.match("^x{}$", "xxx"), None)
309 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
310
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000311 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000312 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000313 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000314 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
315 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
316 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
317 {'first': 1, 'other': 2})
318
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000319 self.assertEqual(re.match("(a)", "a").pos, 0)
320 self.assertEqual(re.match("(a)", "a").endpos, 1)
321 self.assertEqual(re.match("(a)", "a").string, "a")
322 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
323 self.assertNotEqual(re.match("(a)", "a").re, None)
324
325 def test_special_escapes(self):
326 self.assertEqual(re.search(r"\b(b.)\b",
327 "abcd abc bcd bx").group(1), "bx")
328 self.assertEqual(re.search(r"\B(b.)\B",
329 "abc bcd bc abxd").group(1), "bx")
330 self.assertEqual(re.search(r"\b(b.)\b",
331 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
332 self.assertEqual(re.search(r"\B(b.)\B",
333 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
334 self.assertEqual(re.search(r"\b(b.)\b",
335 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
336 self.assertEqual(re.search(r"\B(b.)\B",
337 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
338 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
339 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
340 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
341 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000342 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000343 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000344 "abc bcd bc abxd").group(1), "bx")
345 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
346 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
347 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000348 self.assertEqual(re.search(r"\d\D\w\W\s\S",
349 "1aa! a").group(0), "1aa! a")
350 self.assertEqual(re.search(r"\d\D\w\W\s\S",
351 "1aa! a", re.LOCALE).group(0), "1aa! a")
352 self.assertEqual(re.search(r"\d\D\w\W\s\S",
353 "1aa! a", re.UNICODE).group(0), "1aa! a")
354
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000355 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000356 self.assertEqual(re.match("([\u2222\u2223])",
357 "\u2222").group(1), "\u2222")
358 self.assertEqual(re.match("([\u2222\u2223])",
359 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000360
361 def test_anyall(self):
362 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
363 "a\nb")
364 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
365 "a\n\nb")
366
367 def test_non_consuming(self):
368 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
369 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
370 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
371 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
372 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
373 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
374 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
375
376 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
377 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
378 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
379 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
380
381 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000382 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
383 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000384 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
385 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
386 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
387 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
388 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
389 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
390 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
391 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
392
393 def test_category(self):
394 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
395
396 def test_getlower(self):
397 import _sre
398 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
399 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
400 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
401
402 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000403 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000404
405 def test_not_literal(self):
406 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
407 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
408
409 def test_search_coverage(self):
410 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
411 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
412
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000413 def test_re_escape(self):
414 p=""
415 for i in range(0, 256):
416 p = p + chr(i)
417 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
418 True)
419 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000420
Skip Montanaro1e703c62003-04-25 15:40:28 +0000421 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000422 self.assertEqual(pat.match(p) is not None, True)
423 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000424
Skip Montanaro1e703c62003-04-25 15:40:28 +0000425 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000426 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
427 s = pickle.dumps(oldpat)
428 newpat = pickle.loads(s)
429 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000430
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000431 def test_constants(self):
432 self.assertEqual(re.I, re.IGNORECASE)
433 self.assertEqual(re.L, re.LOCALE)
434 self.assertEqual(re.M, re.MULTILINE)
435 self.assertEqual(re.S, re.DOTALL)
436 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000437
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000438 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000439 for flag in [re.I, re.M, re.X, re.S, re.L]:
440 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000441
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000442 def test_sre_character_literals(self):
443 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
444 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
445 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
446 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
447 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
448 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
449 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
450 self.assertRaises(re.error, re.match, "\911", "")
451
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000452 def test_sre_character_class_literals(self):
453 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
454 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
455 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
456 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
457 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
458 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
459 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
460 self.assertRaises(re.error, re.match, "[\911]", "")
461
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000462 def test_bug_113254(self):
463 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
464 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
465 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
466
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000467 def test_bug_527371(self):
468 # bug described in patches 527371/672491
469 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
470 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
471 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
472 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
473 self.assertEqual(re.match("((a))", "a").lastindex, 1)
474
475 def test_bug_545855(self):
476 # bug 545855 -- This pattern failed to cause a compile error as it
477 # should, instead provoking a TypeError.
478 self.assertRaises(re.error, re.compile, 'foo[a-')
479
480 def test_bug_418626(self):
481 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
482 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
483 # pattern '*?' on a long string.
484 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
485 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
486 20003)
487 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000488 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000489 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000490 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000491
492 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000493 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000494 self.assertEqual(re.compile(pat) and 1, 1)
495
Skip Montanaro1e703c62003-04-25 15:40:28 +0000496 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000497 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000498 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000499 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
500 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
501 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000502
503 def test_scanner(self):
504 def s_ident(scanner, token): return token
505 def s_operator(scanner, token): return "op%s" % token
506 def s_float(scanner, token): return float(token)
507 def s_int(scanner, token): return int(token)
508
509 scanner = Scanner([
510 (r"[a-zA-Z_]\w*", s_ident),
511 (r"\d+\.\d*", s_float),
512 (r"\d+", s_int),
513 (r"=|\+|-|\*|/", s_operator),
514 (r"\s+", None),
515 ])
516
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000517 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
518
Skip Montanaro1e703c62003-04-25 15:40:28 +0000519 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
520 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
521 'op+', 'bar'], ''))
522
Skip Montanaro5ba00542003-04-25 16:00:14 +0000523 def test_bug_448951(self):
524 # bug 448951 (similar to 429357, but with single char match)
525 # (Also test greedy matches.)
526 for op in '','?','*':
527 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
528 (None, None))
529 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
530 ('a:', 'a'))
531
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000532 def test_bug_725106(self):
533 # capturing groups in alternatives in repeats
534 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
535 ('b', 'a'))
536 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
537 ('c', 'b'))
538 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
539 ('b', None))
540 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
541 ('b', None))
542 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
543 ('b', 'a'))
544 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
545 ('c', 'b'))
546 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
547 ('b', None))
548 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
549 ('b', None))
550
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000551 def test_bug_725149(self):
552 # mark_stack_base restoring before restoring marks
553 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
554 ('a', None))
555 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
556 ('a', None, None))
557
Just van Rossum12723ba2003-07-02 20:03:04 +0000558 def test_bug_764548(self):
559 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000560 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000561 pat = re.compile(my_unicode("abc"))
562 self.assertEqual(pat.match("xyz"), None)
563
Skip Montanaro5ba00542003-04-25 16:00:14 +0000564 def test_finditer(self):
565 iter = re.finditer(r":+", "a:b::c:::d")
566 self.assertEqual([item.group(0) for item in iter],
567 [":", "::", ":::"])
568
Thomas Wouters40a088d2008-03-18 20:19:54 +0000569 def test_bug_926075(self):
570 self.assert_(re.compile('bug_926075') is not
571 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000572
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000573 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000574 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000575 self.assertEqual(re.compile(pattern).split("a.b.c"),
576 ['a','b','c'])
577
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000578 def test_bug_581080(self):
579 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000580 self.assertEqual(next(iter).span(), (1,2))
581 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000582
583 scanner = re.compile(r"\s").scanner("a b")
584 self.assertEqual(scanner.search().span(), (1, 2))
585 self.assertEqual(scanner.search(), None)
586
587 def test_bug_817234(self):
588 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000589 self.assertEqual(next(iter).span(), (0, 4))
590 self.assertEqual(next(iter).span(), (4, 4))
591 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000592
Guido van Rossumd8faa362007-04-27 19:54:29 +0000593 def test_empty_array(self):
594 # SF buf 1647541
595 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000596 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000598 self.assertEqual(re.compile(b"bla").match(a), None)
599 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000600
Christian Heimes072c0f12008-01-03 23:01:04 +0000601 def test_inline_flags(self):
602 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000603 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
604 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000605
606 p = re.compile(upper_char, re.I | re.U)
607 q = p.match(lower_char)
608 self.assertNotEqual(q, None)
609
610 p = re.compile(lower_char, re.I | re.U)
611 q = p.match(upper_char)
612 self.assertNotEqual(q, None)
613
614 p = re.compile('(?i)' + upper_char, re.U)
615 q = p.match(lower_char)
616 self.assertNotEqual(q, None)
617
618 p = re.compile('(?i)' + lower_char, re.U)
619 q = p.match(upper_char)
620 self.assertNotEqual(q, None)
621
622 p = re.compile('(?iu)' + upper_char)
623 q = p.match(lower_char)
624 self.assertNotEqual(q, None)
625
626 p = re.compile('(?iu)' + lower_char)
627 q = p.match(upper_char)
628 self.assertNotEqual(q, None)
629
Christian Heimes25bb7832008-01-11 16:17:00 +0000630 def test_dollar_matches_twice(self):
631 "$ matches the end of string, and just before the terminating \n"
632 pattern = re.compile('$')
633 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
634 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
635 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
636
637 pattern = re.compile('$', re.MULTILINE)
638 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
639 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
640 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
641
Antoine Pitroufd036452008-08-19 17:56:33 +0000642 def test_bytes_str_mixing(self):
643 # Mixing str and bytes is disallowed
644 pat = re.compile('.')
645 bpat = re.compile(b'.')
646 self.assertRaises(TypeError, pat.match, b'b')
647 self.assertRaises(TypeError, bpat.match, 'b')
648 self.assertRaises(TypeError, pat.sub, b'b', 'c')
649 self.assertRaises(TypeError, pat.sub, 'b', b'c')
650 self.assertRaises(TypeError, pat.sub, b'b', b'c')
651 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
652 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
653 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
654
655 def test_ascii_and_unicode_flag(self):
656 # String patterns
657 for flags in (0, re.UNICODE):
658 pat = re.compile('\xc0', flags | re.IGNORECASE)
659 self.assertNotEqual(pat.match('\xe0'), None)
660 pat = re.compile('\w', flags)
661 self.assertNotEqual(pat.match('\xe0'), None)
662 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
663 self.assertEqual(pat.match('\xe0'), None)
664 pat = re.compile('(?a)\xc0', re.IGNORECASE)
665 self.assertEqual(pat.match('\xe0'), None)
666 pat = re.compile('\w', re.ASCII)
667 self.assertEqual(pat.match('\xe0'), None)
668 pat = re.compile('(?a)\w')
669 self.assertEqual(pat.match('\xe0'), None)
670 # Bytes patterns
671 for flags in (0, re.ASCII):
672 pat = re.compile(b'\xc0', re.IGNORECASE)
673 self.assertEqual(pat.match(b'\xe0'), None)
674 pat = re.compile(b'\w')
675 self.assertEqual(pat.match(b'\xe0'), None)
676 # Incompatibilities
677 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
678 self.assertRaises(ValueError, re.compile, b'(?u)\w')
679 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
680 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
681 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
682 self.assertRaises(ValueError, re.compile, '(?au)\w')
683
Christian Heimes072c0f12008-01-03 23:01:04 +0000684
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000685def run_re_tests():
686 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
687 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000688 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000689 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000690 # To save time, only run the first and last 10 tests
691 #tests = tests[:10] + tests[-10:]
692 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000693
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000694 for t in tests:
695 sys.stdout.flush()
696 pattern = s = outcome = repl = expected = None
697 if len(t) == 5:
698 pattern, s, outcome, repl, expected = t
699 elif len(t) == 3:
700 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000701 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000702 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000703
Guido van Rossum41360a41998-03-26 19:42:58 +0000704 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000705 obj = re.compile(pattern)
706 except re.error:
707 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000708 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000709 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000710 except KeyboardInterrupt: raise KeyboardInterrupt
711 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000712 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000713 if verbose:
714 traceback.print_exc(file=sys.stdout)
715 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000716 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000717 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000718 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000719 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000720 if outcome == SYNTAX_ERROR:
721 # This should have been a syntax error; forget it.
722 pass
723 elif outcome == FAIL:
724 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000725 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000726 elif outcome == SUCCEED:
727 if result is not None:
728 # Matched, as expected, so now we compute the
729 # result string and compare it to our expected result.
730 start, end = result.span(0)
731 vardict={'found': result.group(0),
732 'groups': result.group(),
733 'flags': result.re.flags}
734 for i in range(1, 100):
735 try:
736 gi = result.group(i)
737 # Special hack because else the string concat fails:
738 if gi is None:
739 gi = "None"
740 except IndexError:
741 gi = "Error"
742 vardict['g%d' % i] = gi
743 for i in result.re.groupindex.keys():
744 try:
745 gi = result.group(i)
746 if gi is None:
747 gi = "None"
748 except IndexError:
749 gi = "Error"
750 vardict[i] = gi
751 repl = eval(repl, vardict)
752 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000753 print('=== grouping error', t, end=' ')
754 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000755 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000756 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000757
Antoine Pitrou22628c42008-07-22 17:53:22 +0000758 # Try the match with both pattern and string converted to
759 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000760 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000761 bpat = bytes(pattern, "ascii")
762 bs = bytes(s, "ascii")
763 except UnicodeEncodeError:
764 # skip non-ascii tests
765 pass
766 else:
767 try:
768 bpat = re.compile(bpat)
769 except Exception:
770 print('=== Fails on bytes pattern compile', t)
771 if verbose:
772 traceback.print_exc(file=sys.stdout)
773 else:
774 bytes_result = bpat.search(bs)
775 if bytes_result is None:
776 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000777
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000778 # Try the match with the search area limited to the extent
779 # of the match and see if it still succeeds. \B will
780 # break (because it won't match at the end or start of a
781 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000782
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000783 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
784 and result is not None:
785 obj = re.compile(pattern)
786 result = obj.search(s, result.start(0), result.end(0) + 1)
787 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000788 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000789
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000790 # Try the match with IGNORECASE enabled, and check that it
791 # still succeeds.
792 obj = re.compile(pattern, re.IGNORECASE)
793 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000794 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000795 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000796
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000797 # Try the match with LOCALE enabled, and check that it
798 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +0000799 if '(?u)' not in pattern:
800 obj = re.compile(pattern, re.LOCALE)
801 result = obj.search(s)
802 if result is None:
803 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000804
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000805 # Try the match with UNICODE locale enabled, and check
806 # that it still succeeds.
807 obj = re.compile(pattern, re.UNICODE)
808 result = obj.search(s)
809 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000810 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000811
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000812def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000813 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000814 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000815
816if __name__ == "__main__":
817 test_main()