blob: 07bc63b277a76568ef34f303f549ec4d0d12aa9d [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000086 def test_sub_template_numeric_escape(self):
87 # bug 776311 and friends
88 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
89 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
90 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
91 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
92 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
93 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
94 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
95
96 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
97 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
98
99 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
100 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
101 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
102 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
103 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
104
105 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
106 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000107
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000108 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
109 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
110 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
111 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
112 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
113 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
114 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
115 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
116 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
117 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
118 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
119 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
120
121 # in python2.3 (etc), these loop endlessly in sre_parser.py
122 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
123 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
124 'xz8')
125 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
126 'xza')
127
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000128 def test_qualified_re_sub(self):
129 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
130 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000131
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000132 def test_bug_114660(self):
133 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
134 'hello there')
135
136 def test_bug_462270(self):
137 # Test for empty sub() behaviour, see SF bug #462270
138 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
139 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
140
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000141 def test_symbolic_refs(self):
142 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
143 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
144 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
145 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
146 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
147 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
148 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
149 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000150 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_re_subn(self):
153 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
154 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
155 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
156 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
157 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000158
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000159 def test_re_split(self):
160 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
161 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
162 self.assertEqual(re.split("(:*)", ":a:b::c"),
163 ['', ':', 'a', ':', 'b', '::', 'c'])
164 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
165 self.assertEqual(re.split("(:)*", ":a:b::c"),
166 ['', ':', 'a', ':', 'b', ':', 'c'])
167 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
168 ['', ':', 'a', ':b::', 'c'])
169 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
170 ['', None, ':', 'a', None, ':', '', 'b', None, '',
171 None, '::', 'c'])
172 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
173 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000174
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000175 def test_qualified_re_split(self):
176 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
177 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
178 self.assertEqual(re.split("(:)", ":a:b::c", 2),
179 ['', ':', 'a', ':', 'b::c'])
180 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
181 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000182
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000183 def test_re_findall(self):
184 self.assertEqual(re.findall(":+", "abc"), [])
185 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
186 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
187 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
188 (":", ":"),
189 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000190
Skip Montanaro5ba00542003-04-25 16:00:14 +0000191 def test_bug_117612(self):
192 self.assertEqual(re.findall(r"(a|(b))", "aba"),
193 [("a", ""),("b", "b"),("a", "")])
194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000196 self.assertEqual(re.match('a', 'a').groups(), ())
197 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
198 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
199 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
200 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000201
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 pat = re.compile('((a)|(b))(c)?')
203 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
204 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
205 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
206 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
207 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000208
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000209 # A single group
210 m = re.match('(a)', 'a')
211 self.assertEqual(m.group(0), 'a')
212 self.assertEqual(m.group(0), 'a')
213 self.assertEqual(m.group(1), 'a')
214 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000215
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000216 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
217 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
218 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
219 (None, 'b', None))
220 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000221
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000222 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000223 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
224 ('(', 'a'))
225 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
226 (None, 'a'))
227 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
228 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
229 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
230 ('a', 'b'))
231 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
232 (None, 'd'))
233 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
234 (None, 'd'))
235 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
236 ('a', ''))
237
238 def test_re_groupref(self):
239 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
240 ('|', 'a'))
241 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
242 (None, 'a'))
243 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
244 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
245 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
246 ('a', 'a'))
247 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
248 (None, None))
249
250 def test_groupdict(self):
251 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
252 'first second').groupdict(),
253 {'first':'first', 'second':'second'})
254
255 def test_expand(self):
256 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
257 "first second")
258 .expand(r"\2 \1 \g<second> \g<first>"),
259 "second first second first")
260
261 def test_repeat_minmax(self):
262 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
263 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
264 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
265 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
266
267 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
268 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
269 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
270 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
271 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
272 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
273 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
274 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
275
276 self.assertEqual(re.match("^x{1}$", "xxx"), None)
277 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
278 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
279 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
280
281 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
282 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
283 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
284 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
285 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
286 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
287 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
288 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
289
290 def test_getattr(self):
291 self.assertEqual(re.match("(a)", "a").pos, 0)
292 self.assertEqual(re.match("(a)", "a").endpos, 1)
293 self.assertEqual(re.match("(a)", "a").string, "a")
294 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
295 self.assertNotEqual(re.match("(a)", "a").re, None)
296
297 def test_special_escapes(self):
298 self.assertEqual(re.search(r"\b(b.)\b",
299 "abcd abc bcd bx").group(1), "bx")
300 self.assertEqual(re.search(r"\B(b.)\B",
301 "abc bcd bc abxd").group(1), "bx")
302 self.assertEqual(re.search(r"\b(b.)\b",
303 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
304 self.assertEqual(re.search(r"\B(b.)\B",
305 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
306 self.assertEqual(re.search(r"\b(b.)\b",
307 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
308 self.assertEqual(re.search(r"\B(b.)\B",
309 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
310 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
311 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
312 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
313 self.assertEqual(re.search(r"\b(b.)\b",
314 u"abcd abc bcd bx").group(1), "bx")
315 self.assertEqual(re.search(r"\B(b.)\B",
316 u"abc bcd bc abxd").group(1), "bx")
317 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
318 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
319 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
320 self.assertEqual(re.search(r"\d\D\w\W\s\S",
321 "1aa! a").group(0), "1aa! a")
322 self.assertEqual(re.search(r"\d\D\w\W\s\S",
323 "1aa! a", re.LOCALE).group(0), "1aa! a")
324 self.assertEqual(re.search(r"\d\D\w\W\s\S",
325 "1aa! a", re.UNICODE).group(0), "1aa! a")
326
327 def test_ignore_case(self):
328 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
329 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
330
331 def test_bigcharset(self):
332 self.assertEqual(re.match(u"([\u2222\u2223])",
333 u"\u2222").group(1), u"\u2222")
334 self.assertEqual(re.match(u"([\u2222\u2223])",
335 u"\u2222", re.UNICODE).group(1), u"\u2222")
336
337 def test_anyall(self):
338 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
339 "a\nb")
340 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
341 "a\n\nb")
342
343 def test_non_consuming(self):
344 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
345 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
346 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
347 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
348 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
349 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
350 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
351
352 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
353 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
354 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
355 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
356
357 def test_ignore_case(self):
358 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
359 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
360 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
361 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
362 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
363 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
364 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
365 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
366
367 def test_category(self):
368 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
369
370 def test_getlower(self):
371 import _sre
372 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
373 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
374 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
375
376 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
377 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
378
379 def test_not_literal(self):
380 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
381 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
382
383 def test_search_coverage(self):
384 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
385 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
386
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000387 def test_re_escape(self):
388 p=""
389 for i in range(0, 256):
390 p = p + chr(i)
391 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
392 True)
393 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000394
Skip Montanaro1e703c62003-04-25 15:40:28 +0000395 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000396 self.assertEqual(pat.match(p) is not None, True)
397 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000398
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000399 def test_pickling(self):
400 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000401 self.pickle_test(pickle)
402 import cPickle
403 self.pickle_test(cPickle)
404
405 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000406 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
407 s = pickle.dumps(oldpat)
408 newpat = pickle.loads(s)
409 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000410
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000411 def test_constants(self):
412 self.assertEqual(re.I, re.IGNORECASE)
413 self.assertEqual(re.L, re.LOCALE)
414 self.assertEqual(re.M, re.MULTILINE)
415 self.assertEqual(re.S, re.DOTALL)
416 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000417
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000418 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000419 for flag in [re.I, re.M, re.X, re.S, re.L]:
420 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000421
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000422 def test_sre_character_literals(self):
423 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
424 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
425 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
426 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
427 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
428 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
429 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
430 self.assertRaises(re.error, re.match, "\911", "")
431
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000432 def test_sre_character_class_literals(self):
433 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
434 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
435 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
436 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
437 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
438 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
439 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
440 self.assertRaises(re.error, re.match, "[\911]", "")
441
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000442 def test_bug_113254(self):
443 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
444 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
445 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
446
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000447 def test_bug_527371(self):
448 # bug described in patches 527371/672491
449 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
450 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
451 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
452 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
453 self.assertEqual(re.match("((a))", "a").lastindex, 1)
454
455 def test_bug_545855(self):
456 # bug 545855 -- This pattern failed to cause a compile error as it
457 # should, instead provoking a TypeError.
458 self.assertRaises(re.error, re.compile, 'foo[a-')
459
460 def test_bug_418626(self):
461 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
462 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
463 # pattern '*?' on a long string.
464 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
465 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
466 20003)
467 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000468 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000469 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000470 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000471
472 def test_bug_612074(self):
473 pat=u"["+re.escape(u"\u2039")+u"]"
474 self.assertEqual(re.compile(pat) and 1, 1)
475
Skip Montanaro1e703c62003-04-25 15:40:28 +0000476 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000477 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000478 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
480 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
481 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000482
483 def test_scanner(self):
484 def s_ident(scanner, token): return token
485 def s_operator(scanner, token): return "op%s" % token
486 def s_float(scanner, token): return float(token)
487 def s_int(scanner, token): return int(token)
488
489 scanner = Scanner([
490 (r"[a-zA-Z_]\w*", s_ident),
491 (r"\d+\.\d*", s_float),
492 (r"\d+", s_int),
493 (r"=|\+|-|\*|/", s_operator),
494 (r"\s+", None),
495 ])
496
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000497 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
498
Skip Montanaro1e703c62003-04-25 15:40:28 +0000499 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
500 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
501 'op+', 'bar'], ''))
502
Skip Montanaro5ba00542003-04-25 16:00:14 +0000503 def test_bug_448951(self):
504 # bug 448951 (similar to 429357, but with single char match)
505 # (Also test greedy matches.)
506 for op in '','?','*':
507 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
508 (None, None))
509 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
510 ('a:', 'a'))
511
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000512 def test_bug_725106(self):
513 # capturing groups in alternatives in repeats
514 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
515 ('b', 'a'))
516 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
517 ('c', 'b'))
518 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
519 ('b', None))
520 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
521 ('b', None))
522 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
523 ('b', 'a'))
524 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
525 ('c', 'b'))
526 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
527 ('b', None))
528 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
529 ('b', None))
530
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000531 def test_bug_725149(self):
532 # mark_stack_base restoring before restoring marks
533 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
534 ('a', None))
535 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
536 ('a', None, None))
537
Just van Rossum12723ba2003-07-02 20:03:04 +0000538 def test_bug_764548(self):
539 # bug 764548, re.compile() barfs on str/unicode subclasses
540 try:
541 unicode
542 except NameError:
543 return # no problem if we have no unicode
544 class my_unicode(unicode): pass
545 pat = re.compile(my_unicode("abc"))
546 self.assertEqual(pat.match("xyz"), None)
547
Skip Montanaro5ba00542003-04-25 16:00:14 +0000548 def test_finditer(self):
549 iter = re.finditer(r":+", "a:b::c:::d")
550 self.assertEqual([item.group(0) for item in iter],
551 [":", "::", ":::"])
552
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000553 def test_bug_926075(self):
554 try:
555 unicode
556 except NameError:
557 return # no problem if we have no unicode
558 self.assert_(re.compile('bug_926075') is not
559 re.compile(eval("u'bug_926075'")))
560
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000561 def test_bug_931848(self):
562 try:
563 unicode
564 except NameError:
565 pass
566 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
567 self.assertEqual(re.compile(pattern).split("a.b.c"),
568 ['a','b','c'])
569
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000570 def test_bug_581080(self):
571 iter = re.finditer(r"\s", "a b")
572 self.assertEqual(iter.next().span(), (1,2))
573 self.assertRaises(StopIteration, iter.next)
574
575 scanner = re.compile(r"\s").scanner("a b")
576 self.assertEqual(scanner.search().span(), (1, 2))
577 self.assertEqual(scanner.search(), None)
578
579 def test_bug_817234(self):
580 iter = re.finditer(r".*", "asdf")
581 self.assertEqual(iter.next().span(), (0, 4))
582 self.assertEqual(iter.next().span(), (4, 4))
583 self.assertRaises(StopIteration, iter.next)
584
585
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000586def run_re_tests():
587 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
588 if verbose:
589 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000590 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000591 # To save time, only run the first and last 10 tests
592 #tests = tests[:10] + tests[-10:]
593 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000594
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000595 for t in tests:
596 sys.stdout.flush()
597 pattern = s = outcome = repl = expected = None
598 if len(t) == 5:
599 pattern, s, outcome, repl, expected = t
600 elif len(t) == 3:
601 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000602 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000603 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
604
Guido van Rossum41360a41998-03-26 19:42:58 +0000605 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000606 obj = re.compile(pattern)
607 except re.error:
608 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000609 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000610 print '=== Syntax error:', t
611 except KeyboardInterrupt: raise KeyboardInterrupt
612 except:
613 print '*** Unexpected error ***', t
614 if verbose:
615 traceback.print_exc(file=sys.stdout)
616 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000617 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000618 result = obj.search(s)
619 except re.error, msg:
620 print '=== Unexpected exception', t, repr(msg)
621 if outcome == SYNTAX_ERROR:
622 # This should have been a syntax error; forget it.
623 pass
624 elif outcome == FAIL:
625 if result is None: pass # No match, as expected
626 else: print '=== Succeeded incorrectly', t
627 elif outcome == SUCCEED:
628 if result is not None:
629 # Matched, as expected, so now we compute the
630 # result string and compare it to our expected result.
631 start, end = result.span(0)
632 vardict={'found': result.group(0),
633 'groups': result.group(),
634 'flags': result.re.flags}
635 for i in range(1, 100):
636 try:
637 gi = result.group(i)
638 # Special hack because else the string concat fails:
639 if gi is None:
640 gi = "None"
641 except IndexError:
642 gi = "Error"
643 vardict['g%d' % i] = gi
644 for i in result.re.groupindex.keys():
645 try:
646 gi = result.group(i)
647 if gi is None:
648 gi = "None"
649 except IndexError:
650 gi = "Error"
651 vardict[i] = gi
652 repl = eval(repl, vardict)
653 if repl != expected:
654 print '=== grouping error', t,
655 print repr(repl) + ' should be ' + repr(expected)
656 else:
657 print '=== Failed incorrectly', t
658
659 # Try the match on a unicode string, and check that it
660 # still succeeds.
661 try:
662 result = obj.search(unicode(s, "latin-1"))
663 if result is None:
664 print '=== Fails on unicode match', t
665 except NameError:
666 continue # 1.5.2
667 except TypeError:
668 continue # unicode test case
669
670 # Try the match on a unicode pattern, and check that it
671 # still succeeds.
672 obj=re.compile(unicode(pattern, "latin-1"))
673 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000674 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000675 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000676
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000677 # Try the match with the search area limited to the extent
678 # of the match and see if it still succeeds. \B will
679 # break (because it won't match at the end or start of a
680 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000681
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000682 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
683 and result is not None:
684 obj = re.compile(pattern)
685 result = obj.search(s, result.start(0), result.end(0) + 1)
686 if result is None:
687 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000688
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000689 # Try the match with IGNORECASE enabled, and check that it
690 # still succeeds.
691 obj = re.compile(pattern, re.IGNORECASE)
692 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000693 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000694 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000695
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000696 # Try the match with LOCALE enabled, and check that it
697 # still succeeds.
698 obj = re.compile(pattern, re.LOCALE)
699 result = obj.search(s)
700 if result is None:
701 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000702
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000703 # Try the match with UNICODE locale enabled, and check
704 # that it still succeeds.
705 obj = re.compile(pattern, re.UNICODE)
706 result = obj.search(s)
707 if result is None:
708 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000709
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000710def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000711 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000712 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000713
714if __name__ == "__main__":
715 test_main()