blob: 2363ce5ee6e3e0998e8ce7accfee5d8b0a3bb6f4 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000012# what you're doing. Some of these tests were carefuly modeled to
13# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
18 def test_search_star_plus(self):
19 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
20 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
21 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
22 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000023 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000024 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
25 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
26 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
27 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000028 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000029
Skip Montanaro8ed06da2003-04-24 19:43:18 +000030 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000031 int_value = int(matchobj.group(0))
32 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000033
Skip Montanaro8ed06da2003-04-24 19:43:18 +000034 def test_basic_re_sub(self):
35 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
36 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
37 '9.3 -3 24x100y')
38 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
39 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
42 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000043
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 s = r"\1\1"
45 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
46 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
47 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
50 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
51 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
52 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
55 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
56 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
57 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
58 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000061
Skip Montanaro2726fcd2003-04-25 14:31:54 +000062 def test_bug_449964(self):
63 # fails for group followed by other escape
64 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
65 'xx\bxx\b')
66
67 def test_bug_449000(self):
68 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
70 'abc\ndef\n')
71 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
72 'abc\ndef\n')
73 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
74 'abc\ndef\n')
75 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
76 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000077
Skip Montanaro8ed06da2003-04-24 19:43:18 +000078 def test_qualified_re_sub(self):
79 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
80 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +000081
Skip Montanaro2726fcd2003-04-25 14:31:54 +000082 def test_bug_114660(self):
83 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
84 'hello there')
85
86 def test_bug_462270(self):
87 # Test for empty sub() behaviour, see SF bug #462270
88 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
89 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
90
Skip Montanaro8ed06da2003-04-24 19:43:18 +000091 def test_symbolic_refs(self):
92 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
93 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
94 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
95 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
96 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
97 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
98 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
99 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000100
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000101 def test_re_subn(self):
102 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
103 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
104 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
105 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
106 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000107
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 def test_re_split(self):
109 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
110 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
111 self.assertEqual(re.split("(:*)", ":a:b::c"),
112 ['', ':', 'a', ':', 'b', '::', 'c'])
113 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
114 self.assertEqual(re.split("(:)*", ":a:b::c"),
115 ['', ':', 'a', ':', 'b', ':', 'c'])
116 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
117 ['', ':', 'a', ':b::', 'c'])
118 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
119 ['', None, ':', 'a', None, ':', '', 'b', None, '',
120 None, '::', 'c'])
121 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
122 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000123
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000124 def test_qualified_re_split(self):
125 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
126 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
127 self.assertEqual(re.split("(:)", ":a:b::c", 2),
128 ['', ':', 'a', ':', 'b::c'])
129 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
130 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000131
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000132 def test_re_findall(self):
133 self.assertEqual(re.findall(":+", "abc"), [])
134 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
135 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
136 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
137 (":", ":"),
138 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000139
Skip Montanaro5ba00542003-04-25 16:00:14 +0000140 def test_bug_117612(self):
141 self.assertEqual(re.findall(r"(a|(b))", "aba"),
142 [("a", ""),("b", "b"),("a", "")])
143
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000144 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000145 self.assertEqual(re.match('a', 'a').groups(), ())
146 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
147 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
148 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
149 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000150
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000151 pat = re.compile('((a)|(b))(c)?')
152 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
153 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
154 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
155 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
156 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000157
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000158 # A single group
159 m = re.match('(a)', 'a')
160 self.assertEqual(m.group(0), 'a')
161 self.assertEqual(m.group(0), 'a')
162 self.assertEqual(m.group(1), 'a')
163 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000164
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000165 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
166 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
167 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
168 (None, 'b', None))
169 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000170
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000171 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000172 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
173 ('(', 'a'))
174 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
175 (None, 'a'))
176 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
177 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
178 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
179 ('a', 'b'))
180 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
181 (None, 'd'))
182 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
183 (None, 'd'))
184 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
185 ('a', ''))
186
187 def test_re_groupref(self):
188 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
189 ('|', 'a'))
190 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
191 (None, 'a'))
192 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
193 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
194 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
195 ('a', 'a'))
196 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
197 (None, None))
198
199 def test_groupdict(self):
200 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
201 'first second').groupdict(),
202 {'first':'first', 'second':'second'})
203
204 def test_expand(self):
205 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
206 "first second")
207 .expand(r"\2 \1 \g<second> \g<first>"),
208 "second first second first")
209
210 def test_repeat_minmax(self):
211 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
212 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
213 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
214 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
215
216 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
217 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
218 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
219 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
220 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
221 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
222 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
223 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
224
225 self.assertEqual(re.match("^x{1}$", "xxx"), None)
226 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
227 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
228 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
229
230 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
231 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
232 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
233 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
234 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
235 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
236 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
237 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
238
239 def test_getattr(self):
240 self.assertEqual(re.match("(a)", "a").pos, 0)
241 self.assertEqual(re.match("(a)", "a").endpos, 1)
242 self.assertEqual(re.match("(a)", "a").string, "a")
243 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
244 self.assertNotEqual(re.match("(a)", "a").re, None)
245
246 def test_special_escapes(self):
247 self.assertEqual(re.search(r"\b(b.)\b",
248 "abcd abc bcd bx").group(1), "bx")
249 self.assertEqual(re.search(r"\B(b.)\B",
250 "abc bcd bc abxd").group(1), "bx")
251 self.assertEqual(re.search(r"\b(b.)\b",
252 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
253 self.assertEqual(re.search(r"\B(b.)\B",
254 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
255 self.assertEqual(re.search(r"\b(b.)\b",
256 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
257 self.assertEqual(re.search(r"\B(b.)\B",
258 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
259 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
260 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
261 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
262 self.assertEqual(re.search(r"\b(b.)\b",
263 u"abcd abc bcd bx").group(1), "bx")
264 self.assertEqual(re.search(r"\B(b.)\B",
265 u"abc bcd bc abxd").group(1), "bx")
266 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
267 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
268 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
269 self.assertEqual(re.search(r"\d\D\w\W\s\S",
270 "1aa! a").group(0), "1aa! a")
271 self.assertEqual(re.search(r"\d\D\w\W\s\S",
272 "1aa! a", re.LOCALE).group(0), "1aa! a")
273 self.assertEqual(re.search(r"\d\D\w\W\s\S",
274 "1aa! a", re.UNICODE).group(0), "1aa! a")
275
276 def test_ignore_case(self):
277 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
278 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
279
280 def test_bigcharset(self):
281 self.assertEqual(re.match(u"([\u2222\u2223])",
282 u"\u2222").group(1), u"\u2222")
283 self.assertEqual(re.match(u"([\u2222\u2223])",
284 u"\u2222", re.UNICODE).group(1), u"\u2222")
285
286 def test_anyall(self):
287 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
288 "a\nb")
289 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
290 "a\n\nb")
291
292 def test_non_consuming(self):
293 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
294 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
295 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
296 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
297 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
298 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
299 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
300
301 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
302 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
303 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
304 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
305
306 def test_ignore_case(self):
307 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
308 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
309 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
310 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
311 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
312 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
313 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
314 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
315
316 def test_category(self):
317 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
318
319 def test_getlower(self):
320 import _sre
321 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
322 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
323 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
324
325 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
326 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
327
328 def test_not_literal(self):
329 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
330 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
331
332 def test_search_coverage(self):
333 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
334 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
335
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000336 def test_re_escape(self):
337 p=""
338 for i in range(0, 256):
339 p = p + chr(i)
340 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
341 True)
342 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000343
Skip Montanaro1e703c62003-04-25 15:40:28 +0000344 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000345 self.assertEqual(pat.match(p) is not None, True)
346 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000347
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000348 def test_pickling(self):
349 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000350 self.pickle_test(pickle)
351 import cPickle
352 self.pickle_test(cPickle)
353
354 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000355 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
356 s = pickle.dumps(oldpat)
357 newpat = pickle.loads(s)
358 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000359
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000360 def test_constants(self):
361 self.assertEqual(re.I, re.IGNORECASE)
362 self.assertEqual(re.L, re.LOCALE)
363 self.assertEqual(re.M, re.MULTILINE)
364 self.assertEqual(re.S, re.DOTALL)
365 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000366
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000367 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000368 for flag in [re.I, re.M, re.X, re.S, re.L]:
369 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000370
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000371 def test_sre_character_literals(self):
372 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
373 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
374 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
375 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
376 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
377 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
378 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
379 self.assertRaises(re.error, re.match, "\911", "")
380
381 def test_bug_113254(self):
382 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
383 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
384 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
385
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000386 def test_bug_527371(self):
387 # bug described in patches 527371/672491
388 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
389 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
390 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
391 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
392 self.assertEqual(re.match("((a))", "a").lastindex, 1)
393
394 def test_bug_545855(self):
395 # bug 545855 -- This pattern failed to cause a compile error as it
396 # should, instead provoking a TypeError.
397 self.assertRaises(re.error, re.compile, 'foo[a-')
398
399 def test_bug_418626(self):
400 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
401 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
402 # pattern '*?' on a long string.
403 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
404 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
405 20003)
406 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000407 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000408 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000409 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000410
411 def test_bug_612074(self):
412 pat=u"["+re.escape(u"\u2039")+u"]"
413 self.assertEqual(re.compile(pat) and 1, 1)
414
Skip Montanaro1e703c62003-04-25 15:40:28 +0000415 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000416 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000417 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000418 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
419 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
420 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000421
422 def test_scanner(self):
423 def s_ident(scanner, token): return token
424 def s_operator(scanner, token): return "op%s" % token
425 def s_float(scanner, token): return float(token)
426 def s_int(scanner, token): return int(token)
427
428 scanner = Scanner([
429 (r"[a-zA-Z_]\w*", s_ident),
430 (r"\d+\.\d*", s_float),
431 (r"\d+", s_int),
432 (r"=|\+|-|\*|/", s_operator),
433 (r"\s+", None),
434 ])
435
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
437
Skip Montanaro1e703c62003-04-25 15:40:28 +0000438 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
439 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
440 'op+', 'bar'], ''))
441
Skip Montanaro5ba00542003-04-25 16:00:14 +0000442 def test_bug_448951(self):
443 # bug 448951 (similar to 429357, but with single char match)
444 # (Also test greedy matches.)
445 for op in '','?','*':
446 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
447 (None, None))
448 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
449 ('a:', 'a'))
450
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000451 def test_bug_725106(self):
452 # capturing groups in alternatives in repeats
453 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
454 ('b', 'a'))
455 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
456 ('c', 'b'))
457 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
458 ('b', None))
459 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
460 ('b', None))
461 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
462 ('b', 'a'))
463 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
464 ('c', 'b'))
465 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
466 ('b', None))
467 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
468 ('b', None))
469
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000470 def test_bug_725149(self):
471 # mark_stack_base restoring before restoring marks
472 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
473 ('a', None))
474 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
475 ('a', None, None))
476
Just van Rossum12723ba2003-07-02 20:03:04 +0000477 def test_bug_764548(self):
478 # bug 764548, re.compile() barfs on str/unicode subclasses
479 try:
480 unicode
481 except NameError:
482 return # no problem if we have no unicode
483 class my_unicode(unicode): pass
484 pat = re.compile(my_unicode("abc"))
485 self.assertEqual(pat.match("xyz"), None)
486
Skip Montanaro5ba00542003-04-25 16:00:14 +0000487 def test_finditer(self):
488 iter = re.finditer(r":+", "a:b::c:::d")
489 self.assertEqual([item.group(0) for item in iter],
490 [":", "::", ":::"])
491
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000492 def test_bug_926075(self):
493 try:
494 unicode
495 except NameError:
496 return # no problem if we have no unicode
497 self.assert_(re.compile('bug_926075') is not
498 re.compile(eval("u'bug_926075'")))
499
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000500 def test_bug_931848(self):
501 try:
502 unicode
503 except NameError:
504 pass
505 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
506 self.assertEqual(re.compile(pattern).split("a.b.c"),
507 ['a','b','c'])
508
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000509def run_re_tests():
510 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
511 if verbose:
512 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000513 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000514 # To save time, only run the first and last 10 tests
515 #tests = tests[:10] + tests[-10:]
516 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000517
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000518 for t in tests:
519 sys.stdout.flush()
520 pattern = s = outcome = repl = expected = None
521 if len(t) == 5:
522 pattern, s, outcome, repl, expected = t
523 elif len(t) == 3:
524 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000525 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000526 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
527
Guido van Rossum41360a41998-03-26 19:42:58 +0000528 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000529 obj = re.compile(pattern)
530 except re.error:
531 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000532 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000533 print '=== Syntax error:', t
534 except KeyboardInterrupt: raise KeyboardInterrupt
535 except:
536 print '*** Unexpected error ***', t
537 if verbose:
538 traceback.print_exc(file=sys.stdout)
539 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000540 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000541 result = obj.search(s)
542 except re.error, msg:
543 print '=== Unexpected exception', t, repr(msg)
544 if outcome == SYNTAX_ERROR:
545 # This should have been a syntax error; forget it.
546 pass
547 elif outcome == FAIL:
548 if result is None: pass # No match, as expected
549 else: print '=== Succeeded incorrectly', t
550 elif outcome == SUCCEED:
551 if result is not None:
552 # Matched, as expected, so now we compute the
553 # result string and compare it to our expected result.
554 start, end = result.span(0)
555 vardict={'found': result.group(0),
556 'groups': result.group(),
557 'flags': result.re.flags}
558 for i in range(1, 100):
559 try:
560 gi = result.group(i)
561 # Special hack because else the string concat fails:
562 if gi is None:
563 gi = "None"
564 except IndexError:
565 gi = "Error"
566 vardict['g%d' % i] = gi
567 for i in result.re.groupindex.keys():
568 try:
569 gi = result.group(i)
570 if gi is None:
571 gi = "None"
572 except IndexError:
573 gi = "Error"
574 vardict[i] = gi
575 repl = eval(repl, vardict)
576 if repl != expected:
577 print '=== grouping error', t,
578 print repr(repl) + ' should be ' + repr(expected)
579 else:
580 print '=== Failed incorrectly', t
581
582 # Try the match on a unicode string, and check that it
583 # still succeeds.
584 try:
585 result = obj.search(unicode(s, "latin-1"))
586 if result is None:
587 print '=== Fails on unicode match', t
588 except NameError:
589 continue # 1.5.2
590 except TypeError:
591 continue # unicode test case
592
593 # Try the match on a unicode pattern, and check that it
594 # still succeeds.
595 obj=re.compile(unicode(pattern, "latin-1"))
596 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000597 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000598 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000599
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000600 # Try the match with the search area limited to the extent
601 # of the match and see if it still succeeds. \B will
602 # break (because it won't match at the end or start of a
603 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000604
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000605 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
606 and result is not None:
607 obj = re.compile(pattern)
608 result = obj.search(s, result.start(0), result.end(0) + 1)
609 if result is None:
610 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000611
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000612 # Try the match with IGNORECASE enabled, and check that it
613 # still succeeds.
614 obj = re.compile(pattern, re.IGNORECASE)
615 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000616 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000617 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000618
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000619 # Try the match with LOCALE enabled, and check that it
620 # still succeeds.
621 obj = re.compile(pattern, re.LOCALE)
622 result = obj.search(s)
623 if result is None:
624 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000625
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000626 # Try the match with UNICODE locale enabled, and check
627 # that it still succeeds.
628 obj = re.compile(pattern, re.UNICODE)
629 result = obj.search(s)
630 if result is None:
631 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000632
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000633def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000634 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000635 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000636
637if __name__ == "__main__":
638 test_main()