blob: 1847c64e2e0a5970d79d9f57de2df3614f7f4ed0 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000011# WARNING: Don't change deteails in these tests if you don't know
12# what you're doing. Some of these tests were carefuly modeled to
13# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
18 def test_search_star_plus(self):
19 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
20 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
21 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
22 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000023 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000024 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
25 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
26 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
27 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000028 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000029
Skip Montanaro8ed06da2003-04-24 19:43:18 +000030 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000031 int_value = int(matchobj.group(0))
32 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000033
Skip Montanaro8ed06da2003-04-24 19:43:18 +000034 def test_basic_re_sub(self):
35 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
36 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
37 '9.3 -3 24x100y')
38 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
39 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
42 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000043
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 s = r"\1\1"
45 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
46 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
47 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
50 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
51 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
52 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
55 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
56 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
57 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
58 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000061
Skip Montanaro2726fcd2003-04-25 14:31:54 +000062 def test_bug_449964(self):
63 # fails for group followed by other escape
64 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
65 'xx\bxx\b')
66
67 def test_bug_449000(self):
68 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
70 'abc\ndef\n')
71 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
72 'abc\ndef\n')
73 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
74 'abc\ndef\n')
75 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
76 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000077
Skip Montanaro8ed06da2003-04-24 19:43:18 +000078 def test_qualified_re_sub(self):
79 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
80 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +000081
Skip Montanaro2726fcd2003-04-25 14:31:54 +000082 def test_bug_114660(self):
83 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
84 'hello there')
85
86 def test_bug_462270(self):
87 # Test for empty sub() behaviour, see SF bug #462270
88 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
89 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
90
Skip Montanaro8ed06da2003-04-24 19:43:18 +000091 def test_symbolic_refs(self):
92 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
93 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
94 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
95 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
96 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
97 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
98 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
99 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000100
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000101 def test_re_subn(self):
102 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
103 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
104 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
105 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
106 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000107
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 def test_re_split(self):
109 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
110 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
111 self.assertEqual(re.split("(:*)", ":a:b::c"),
112 ['', ':', 'a', ':', 'b', '::', 'c'])
113 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
114 self.assertEqual(re.split("(:)*", ":a:b::c"),
115 ['', ':', 'a', ':', 'b', ':', 'c'])
116 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
117 ['', ':', 'a', ':b::', 'c'])
118 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
119 ['', None, ':', 'a', None, ':', '', 'b', None, '',
120 None, '::', 'c'])
121 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
122 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000123
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000124 def test_qualified_re_split(self):
125 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
126 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
127 self.assertEqual(re.split("(:)", ":a:b::c", 2),
128 ['', ':', 'a', ':', 'b::c'])
129 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
130 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000131
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000132 def test_re_findall(self):
133 self.assertEqual(re.findall(":+", "abc"), [])
134 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
135 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
136 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
137 (":", ":"),
138 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000139
Skip Montanaro5ba00542003-04-25 16:00:14 +0000140 def test_bug_117612(self):
141 self.assertEqual(re.findall(r"(a|(b))", "aba"),
142 [("a", ""),("b", "b"),("a", "")])
143
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000144 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000145 self.assertEqual(re.match('a', 'a').groups(), ())
146 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
147 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
148 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
149 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000150
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000151 pat = re.compile('((a)|(b))(c)?')
152 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
153 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
154 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
155 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
156 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000157
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000158 # A single group
159 m = re.match('(a)', 'a')
160 self.assertEqual(m.group(0), 'a')
161 self.assertEqual(m.group(0), 'a')
162 self.assertEqual(m.group(1), 'a')
163 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000164
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000165 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
166 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
167 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
168 (None, 'b', None))
169 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000170
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000171 def test_re_groupref_exists(self):
172 return # not yet
173 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
174 ('(', 'a'))
175 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
176 (None, 'a'))
177 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
178 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
179 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
180 ('a', 'b'))
181 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
182 (None, 'd'))
183 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
184 (None, 'd'))
185 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
186 ('a', ''))
187
188 def test_re_groupref(self):
189 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
190 ('|', 'a'))
191 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
192 (None, 'a'))
193 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
194 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
195 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
196 ('a', 'a'))
197 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
198 (None, None))
199
200 def test_groupdict(self):
201 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
202 'first second').groupdict(),
203 {'first':'first', 'second':'second'})
204
205 def test_expand(self):
206 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
207 "first second")
208 .expand(r"\2 \1 \g<second> \g<first>"),
209 "second first second first")
210
211 def test_repeat_minmax(self):
212 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
213 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
214 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
215 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
216
217 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
218 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
219 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
220 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
221 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
222 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
223 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
224 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
225
226 self.assertEqual(re.match("^x{1}$", "xxx"), None)
227 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
228 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
229 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
230
231 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
232 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
233 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
234 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
235 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
236 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
237 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
238 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
239
240 def test_getattr(self):
241 self.assertEqual(re.match("(a)", "a").pos, 0)
242 self.assertEqual(re.match("(a)", "a").endpos, 1)
243 self.assertEqual(re.match("(a)", "a").string, "a")
244 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
245 self.assertNotEqual(re.match("(a)", "a").re, None)
246
247 def test_special_escapes(self):
248 self.assertEqual(re.search(r"\b(b.)\b",
249 "abcd abc bcd bx").group(1), "bx")
250 self.assertEqual(re.search(r"\B(b.)\B",
251 "abc bcd bc abxd").group(1), "bx")
252 self.assertEqual(re.search(r"\b(b.)\b",
253 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
254 self.assertEqual(re.search(r"\B(b.)\B",
255 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
256 self.assertEqual(re.search(r"\b(b.)\b",
257 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
258 self.assertEqual(re.search(r"\B(b.)\B",
259 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
260 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
261 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
262 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
263 self.assertEqual(re.search(r"\b(b.)\b",
264 u"abcd abc bcd bx").group(1), "bx")
265 self.assertEqual(re.search(r"\B(b.)\B",
266 u"abc bcd bc abxd").group(1), "bx")
267 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
268 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
269 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
270 self.assertEqual(re.search(r"\d\D\w\W\s\S",
271 "1aa! a").group(0), "1aa! a")
272 self.assertEqual(re.search(r"\d\D\w\W\s\S",
273 "1aa! a", re.LOCALE).group(0), "1aa! a")
274 self.assertEqual(re.search(r"\d\D\w\W\s\S",
275 "1aa! a", re.UNICODE).group(0), "1aa! a")
276
277 def test_ignore_case(self):
278 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
279 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
280
281 def test_bigcharset(self):
282 self.assertEqual(re.match(u"([\u2222\u2223])",
283 u"\u2222").group(1), u"\u2222")
284 self.assertEqual(re.match(u"([\u2222\u2223])",
285 u"\u2222", re.UNICODE).group(1), u"\u2222")
286
287 def test_anyall(self):
288 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
289 "a\nb")
290 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
291 "a\n\nb")
292
293 def test_non_consuming(self):
294 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
295 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
296 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
297 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
298 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
299 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
300 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
301
302 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
303 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
304 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
305 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
306
307 def test_ignore_case(self):
308 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
309 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
310 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
311 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
312 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
313 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
314 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
315 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
316
317 def test_category(self):
318 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
319
320 def test_getlower(self):
321 import _sre
322 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
323 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
324 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
325
326 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
327 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
328
329 def test_not_literal(self):
330 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
331 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
332
333 def test_search_coverage(self):
334 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
335 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
336
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000337 def test_re_escape(self):
338 p=""
339 for i in range(0, 256):
340 p = p + chr(i)
341 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
342 True)
343 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000344
Skip Montanaro1e703c62003-04-25 15:40:28 +0000345 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000346 self.assertEqual(pat.match(p) is not None, True)
347 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000348
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000349 def test_pickling(self):
350 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000351 self.pickle_test(pickle)
352 import cPickle
353 self.pickle_test(cPickle)
354
355 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000356 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
357 s = pickle.dumps(oldpat)
358 newpat = pickle.loads(s)
359 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000360
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000361 def test_constants(self):
362 self.assertEqual(re.I, re.IGNORECASE)
363 self.assertEqual(re.L, re.LOCALE)
364 self.assertEqual(re.M, re.MULTILINE)
365 self.assertEqual(re.S, re.DOTALL)
366 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000367
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000368 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000369 for flag in [re.I, re.M, re.X, re.S, re.L]:
370 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000371
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000372 def test_sre_character_literals(self):
373 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
374 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
375 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
376 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
377 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
378 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
379 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
380 self.assertRaises(re.error, re.match, "\911", "")
381
382 def test_bug_113254(self):
383 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
384 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
385 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
386
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000387 def test_bug_527371(self):
388 # bug described in patches 527371/672491
389 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
390 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
391 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
392 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
393 self.assertEqual(re.match("((a))", "a").lastindex, 1)
394
395 def test_bug_545855(self):
396 # bug 545855 -- This pattern failed to cause a compile error as it
397 # should, instead provoking a TypeError.
398 self.assertRaises(re.error, re.compile, 'foo[a-')
399
400 def test_bug_418626(self):
401 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
402 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
403 # pattern '*?' on a long string.
404 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
405 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
406 20003)
407 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
408 # non-simple '*?' still recurses and hits the recursion limit
409 self.assertRaises(RuntimeError, re.search, '(a|b)*?c', 10000*'ab'+'cd')
410
411 def test_bug_612074(self):
412 pat=u"["+re.escape(u"\u2039")+u"]"
413 self.assertEqual(re.compile(pat) and 1, 1)
414
Skip Montanaro1e703c62003-04-25 15:40:28 +0000415 def test_stack_overflow(self):
416 # nasty case that overflows the straightforward recursive
417 # implementation of repeated groups.
418 self.assertRaises(RuntimeError, re.match, '(x)*', 50000*'x')
419 self.assertRaises(RuntimeError, re.match, '(x)*y', 50000*'x'+'y')
420 self.assertRaises(RuntimeError, re.match, '(x)*?y', 50000*'x'+'y')
421
422 def test_scanner(self):
423 def s_ident(scanner, token): return token
424 def s_operator(scanner, token): return "op%s" % token
425 def s_float(scanner, token): return float(token)
426 def s_int(scanner, token): return int(token)
427
428 scanner = Scanner([
429 (r"[a-zA-Z_]\w*", s_ident),
430 (r"\d+\.\d*", s_float),
431 (r"\d+", s_int),
432 (r"=|\+|-|\*|/", s_operator),
433 (r"\s+", None),
434 ])
435
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
437
Skip Montanaro1e703c62003-04-25 15:40:28 +0000438 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
439 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
440 'op+', 'bar'], ''))
441
Skip Montanaro5ba00542003-04-25 16:00:14 +0000442 def test_bug_448951(self):
443 # bug 448951 (similar to 429357, but with single char match)
444 # (Also test greedy matches.)
445 for op in '','?','*':
446 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
447 (None, None))
448 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
449 ('a:', 'a'))
450
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000451 def test_bug_725106(self):
452 # capturing groups in alternatives in repeats
453 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
454 ('b', 'a'))
455 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
456 ('c', 'b'))
457 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
458 ('b', None))
459 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
460 ('b', None))
461 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
462 ('b', 'a'))
463 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
464 ('c', 'b'))
465 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
466 ('b', None))
467 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
468 ('b', None))
469
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000470 def test_bug_725149(self):
471 # mark_stack_base restoring before restoring marks
472 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
473 ('a', None))
474 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
475 ('a', None, None))
476
Skip Montanaro5ba00542003-04-25 16:00:14 +0000477 def test_finditer(self):
478 iter = re.finditer(r":+", "a:b::c:::d")
479 self.assertEqual([item.group(0) for item in iter],
480 [":", "::", ":::"])
481
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000482def run_re_tests():
483 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
484 if verbose:
485 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000486 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000487 # To save time, only run the first and last 10 tests
488 #tests = tests[:10] + tests[-10:]
489 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000490
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000491 for t in tests:
492 sys.stdout.flush()
493 pattern = s = outcome = repl = expected = None
494 if len(t) == 5:
495 pattern, s, outcome, repl, expected = t
496 elif len(t) == 3:
497 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000498 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000499 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
500
Guido van Rossum41360a41998-03-26 19:42:58 +0000501 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000502 obj = re.compile(pattern)
503 except re.error:
504 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000505 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000506 print '=== Syntax error:', t
507 except KeyboardInterrupt: raise KeyboardInterrupt
508 except:
509 print '*** Unexpected error ***', t
510 if verbose:
511 traceback.print_exc(file=sys.stdout)
512 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000513 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000514 result = obj.search(s)
515 except re.error, msg:
516 print '=== Unexpected exception', t, repr(msg)
517 if outcome == SYNTAX_ERROR:
518 # This should have been a syntax error; forget it.
519 pass
520 elif outcome == FAIL:
521 if result is None: pass # No match, as expected
522 else: print '=== Succeeded incorrectly', t
523 elif outcome == SUCCEED:
524 if result is not None:
525 # Matched, as expected, so now we compute the
526 # result string and compare it to our expected result.
527 start, end = result.span(0)
528 vardict={'found': result.group(0),
529 'groups': result.group(),
530 'flags': result.re.flags}
531 for i in range(1, 100):
532 try:
533 gi = result.group(i)
534 # Special hack because else the string concat fails:
535 if gi is None:
536 gi = "None"
537 except IndexError:
538 gi = "Error"
539 vardict['g%d' % i] = gi
540 for i in result.re.groupindex.keys():
541 try:
542 gi = result.group(i)
543 if gi is None:
544 gi = "None"
545 except IndexError:
546 gi = "Error"
547 vardict[i] = gi
548 repl = eval(repl, vardict)
549 if repl != expected:
550 print '=== grouping error', t,
551 print repr(repl) + ' should be ' + repr(expected)
552 else:
553 print '=== Failed incorrectly', t
554
555 # Try the match on a unicode string, and check that it
556 # still succeeds.
557 try:
558 result = obj.search(unicode(s, "latin-1"))
559 if result is None:
560 print '=== Fails on unicode match', t
561 except NameError:
562 continue # 1.5.2
563 except TypeError:
564 continue # unicode test case
565
566 # Try the match on a unicode pattern, and check that it
567 # still succeeds.
568 obj=re.compile(unicode(pattern, "latin-1"))
569 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000570 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000571 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000572
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000573 # Try the match with the search area limited to the extent
574 # of the match and see if it still succeeds. \B will
575 # break (because it won't match at the end or start of a
576 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000577
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000578 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
579 and result is not None:
580 obj = re.compile(pattern)
581 result = obj.search(s, result.start(0), result.end(0) + 1)
582 if result is None:
583 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000584
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000585 # Try the match with IGNORECASE enabled, and check that it
586 # still succeeds.
587 obj = re.compile(pattern, re.IGNORECASE)
588 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000589 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000590 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000591
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000592 # Try the match with LOCALE enabled, and check that it
593 # still succeeds.
594 obj = re.compile(pattern, re.LOCALE)
595 result = obj.search(s)
596 if result is None:
597 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000598
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000599 # Try the match with UNICODE locale enabled, and check
600 # that it still succeeds.
601 obj = re.compile(pattern, re.UNICODE)
602 result = obj.search(s)
603 if result is None:
604 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000605
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000606def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000607 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000608 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000609
610if __name__ == "__main__":
611 test_main()