blob: d2e2753122e5c0a8e79401e03d58511e211efd66 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000012# what you're doing. Some of these tests were carefuly modeled to
13# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
18 def test_search_star_plus(self):
19 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
20 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
21 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
22 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000023 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000024 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
25 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
26 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
27 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000028 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000029
Skip Montanaro8ed06da2003-04-24 19:43:18 +000030 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000031 int_value = int(matchobj.group(0))
32 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000033
Skip Montanaro8ed06da2003-04-24 19:43:18 +000034 def test_basic_re_sub(self):
35 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
36 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
37 '9.3 -3 24x100y')
38 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
39 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
42 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000043
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 s = r"\1\1"
45 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
46 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
47 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
50 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
51 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
52 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
55 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
56 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
57 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
58 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000061
Skip Montanaro2726fcd2003-04-25 14:31:54 +000062 def test_bug_449964(self):
63 # fails for group followed by other escape
64 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
65 'xx\bxx\b')
66
67 def test_bug_449000(self):
68 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
70 'abc\ndef\n')
71 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
72 'abc\ndef\n')
73 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
74 'abc\ndef\n')
75 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
76 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000077
Skip Montanaro8ed06da2003-04-24 19:43:18 +000078 def test_qualified_re_sub(self):
79 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
80 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +000081
Skip Montanaro2726fcd2003-04-25 14:31:54 +000082 def test_bug_114660(self):
83 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
84 'hello there')
85
86 def test_bug_462270(self):
87 # Test for empty sub() behaviour, see SF bug #462270
88 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
89 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
90
Skip Montanaro8ed06da2003-04-24 19:43:18 +000091 def test_symbolic_refs(self):
92 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
93 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
94 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
95 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
96 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
97 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
98 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
99 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000100
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000101 def test_re_subn(self):
102 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
103 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
104 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
105 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
106 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000107
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 def test_re_split(self):
109 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
110 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
111 self.assertEqual(re.split("(:*)", ":a:b::c"),
112 ['', ':', 'a', ':', 'b', '::', 'c'])
113 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
114 self.assertEqual(re.split("(:)*", ":a:b::c"),
115 ['', ':', 'a', ':', 'b', ':', 'c'])
116 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
117 ['', ':', 'a', ':b::', 'c'])
118 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
119 ['', None, ':', 'a', None, ':', '', 'b', None, '',
120 None, '::', 'c'])
121 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
122 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000123
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000124 def test_qualified_re_split(self):
125 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
126 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
127 self.assertEqual(re.split("(:)", ":a:b::c", 2),
128 ['', ':', 'a', ':', 'b::c'])
129 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
130 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000131
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000132 def test_re_findall(self):
133 self.assertEqual(re.findall(":+", "abc"), [])
134 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
135 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
136 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
137 (":", ":"),
138 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000139
Skip Montanaro5ba00542003-04-25 16:00:14 +0000140 def test_bug_117612(self):
141 self.assertEqual(re.findall(r"(a|(b))", "aba"),
142 [("a", ""),("b", "b"),("a", "")])
143
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000144 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000145 self.assertEqual(re.match('a', 'a').groups(), ())
146 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
147 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
148 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
149 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000150
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000151 pat = re.compile('((a)|(b))(c)?')
152 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
153 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
154 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
155 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
156 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000157
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000158 # A single group
159 m = re.match('(a)', 'a')
160 self.assertEqual(m.group(0), 'a')
161 self.assertEqual(m.group(0), 'a')
162 self.assertEqual(m.group(1), 'a')
163 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000164
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000165 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
166 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
167 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
168 (None, 'b', None))
169 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000170
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000171 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000172 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
173 ('(', 'a'))
174 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
175 (None, 'a'))
176 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
177 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
178 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
179 ('a', 'b'))
180 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
181 (None, 'd'))
182 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
183 (None, 'd'))
184 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
185 ('a', ''))
186
187 def test_re_groupref(self):
188 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
189 ('|', 'a'))
190 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
191 (None, 'a'))
192 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
193 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
194 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
195 ('a', 'a'))
196 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
197 (None, None))
198
199 def test_groupdict(self):
200 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
201 'first second').groupdict(),
202 {'first':'first', 'second':'second'})
203
204 def test_expand(self):
205 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
206 "first second")
207 .expand(r"\2 \1 \g<second> \g<first>"),
208 "second first second first")
209
210 def test_repeat_minmax(self):
211 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
212 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
213 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
214 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
215
216 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
217 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
218 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
219 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
220 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
221 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
222 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
223 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
224
225 self.assertEqual(re.match("^x{1}$", "xxx"), None)
226 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
227 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
228 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
229
230 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
231 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
232 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
233 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
234 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
235 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
236 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
237 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
238
239 def test_getattr(self):
240 self.assertEqual(re.match("(a)", "a").pos, 0)
241 self.assertEqual(re.match("(a)", "a").endpos, 1)
242 self.assertEqual(re.match("(a)", "a").string, "a")
243 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
244 self.assertNotEqual(re.match("(a)", "a").re, None)
245
246 def test_special_escapes(self):
247 self.assertEqual(re.search(r"\b(b.)\b",
248 "abcd abc bcd bx").group(1), "bx")
249 self.assertEqual(re.search(r"\B(b.)\B",
250 "abc bcd bc abxd").group(1), "bx")
251 self.assertEqual(re.search(r"\b(b.)\b",
252 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
253 self.assertEqual(re.search(r"\B(b.)\B",
254 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
255 self.assertEqual(re.search(r"\b(b.)\b",
256 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
257 self.assertEqual(re.search(r"\B(b.)\B",
258 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
259 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
260 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
261 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
262 self.assertEqual(re.search(r"\b(b.)\b",
263 u"abcd abc bcd bx").group(1), "bx")
264 self.assertEqual(re.search(r"\B(b.)\B",
265 u"abc bcd bc abxd").group(1), "bx")
266 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
267 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
268 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
269 self.assertEqual(re.search(r"\d\D\w\W\s\S",
270 "1aa! a").group(0), "1aa! a")
271 self.assertEqual(re.search(r"\d\D\w\W\s\S",
272 "1aa! a", re.LOCALE).group(0), "1aa! a")
273 self.assertEqual(re.search(r"\d\D\w\W\s\S",
274 "1aa! a", re.UNICODE).group(0), "1aa! a")
275
276 def test_ignore_case(self):
277 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
278 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
279
280 def test_bigcharset(self):
281 self.assertEqual(re.match(u"([\u2222\u2223])",
282 u"\u2222").group(1), u"\u2222")
283 self.assertEqual(re.match(u"([\u2222\u2223])",
284 u"\u2222", re.UNICODE).group(1), u"\u2222")
285
286 def test_anyall(self):
287 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
288 "a\nb")
289 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
290 "a\n\nb")
291
292 def test_non_consuming(self):
293 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
294 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
295 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
296 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
297 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
298 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
299 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
300
301 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
302 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
303 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
304 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
305
306 def test_ignore_case(self):
307 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
308 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
309 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
310 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
311 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
312 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
313 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
314 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
315
316 def test_category(self):
317 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
318
319 def test_getlower(self):
320 import _sre
321 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
322 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
323 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
324
325 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
326 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
327
328 def test_not_literal(self):
329 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
330 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
331
332 def test_search_coverage(self):
333 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
334 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
335
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000336 def test_re_escape(self):
337 p=""
338 for i in range(0, 256):
339 p = p + chr(i)
340 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
341 True)
342 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000343
Skip Montanaro1e703c62003-04-25 15:40:28 +0000344 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000345 self.assertEqual(pat.match(p) is not None, True)
346 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000347
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000348 def test_pickling(self):
349 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000350 self.pickle_test(pickle)
351 import cPickle
352 self.pickle_test(cPickle)
353
354 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000355 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
356 s = pickle.dumps(oldpat)
357 newpat = pickle.loads(s)
358 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000359
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000360 def test_constants(self):
361 self.assertEqual(re.I, re.IGNORECASE)
362 self.assertEqual(re.L, re.LOCALE)
363 self.assertEqual(re.M, re.MULTILINE)
364 self.assertEqual(re.S, re.DOTALL)
365 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000366
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000367 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000368 for flag in [re.I, re.M, re.X, re.S, re.L]:
369 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000370
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000371 def test_sre_character_literals(self):
372 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
373 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
374 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
375 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
376 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
377 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
378 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
379 self.assertRaises(re.error, re.match, "\911", "")
380
381 def test_bug_113254(self):
382 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
383 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
384 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
385
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000386 def test_bug_527371(self):
387 # bug described in patches 527371/672491
388 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
389 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
390 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
391 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
392 self.assertEqual(re.match("((a))", "a").lastindex, 1)
393
394 def test_bug_545855(self):
395 # bug 545855 -- This pattern failed to cause a compile error as it
396 # should, instead provoking a TypeError.
397 self.assertRaises(re.error, re.compile, 'foo[a-')
398
399 def test_bug_418626(self):
400 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
401 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
402 # pattern '*?' on a long string.
403 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
404 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
405 20003)
406 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000407 # non-simple '*?' still used to hit the recursion limit, before the
408 # non-recursive scheme was implemented.
409 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000410
411 def test_bug_612074(self):
412 pat=u"["+re.escape(u"\u2039")+u"]"
413 self.assertEqual(re.compile(pat) and 1, 1)
414
Skip Montanaro1e703c62003-04-25 15:40:28 +0000415 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000416 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000417 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000418 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
419 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
420 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000421
422 def test_scanner(self):
423 def s_ident(scanner, token): return token
424 def s_operator(scanner, token): return "op%s" % token
425 def s_float(scanner, token): return float(token)
426 def s_int(scanner, token): return int(token)
427
428 scanner = Scanner([
429 (r"[a-zA-Z_]\w*", s_ident),
430 (r"\d+\.\d*", s_float),
431 (r"\d+", s_int),
432 (r"=|\+|-|\*|/", s_operator),
433 (r"\s+", None),
434 ])
435
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
437
Skip Montanaro1e703c62003-04-25 15:40:28 +0000438 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
439 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
440 'op+', 'bar'], ''))
441
Skip Montanaro5ba00542003-04-25 16:00:14 +0000442 def test_bug_448951(self):
443 # bug 448951 (similar to 429357, but with single char match)
444 # (Also test greedy matches.)
445 for op in '','?','*':
446 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
447 (None, None))
448 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
449 ('a:', 'a'))
450
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000451 def test_bug_725106(self):
452 # capturing groups in alternatives in repeats
453 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
454 ('b', 'a'))
455 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
456 ('c', 'b'))
457 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
458 ('b', None))
459 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
460 ('b', None))
461 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
462 ('b', 'a'))
463 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
464 ('c', 'b'))
465 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
466 ('b', None))
467 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
468 ('b', None))
469
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000470 def test_bug_725149(self):
471 # mark_stack_base restoring before restoring marks
472 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
473 ('a', None))
474 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
475 ('a', None, None))
476
Just van Rossum12723ba2003-07-02 20:03:04 +0000477 def test_bug_764548(self):
478 # bug 764548, re.compile() barfs on str/unicode subclasses
479 try:
480 unicode
481 except NameError:
482 return # no problem if we have no unicode
483 class my_unicode(unicode): pass
484 pat = re.compile(my_unicode("abc"))
485 self.assertEqual(pat.match("xyz"), None)
486
Skip Montanaro5ba00542003-04-25 16:00:14 +0000487 def test_finditer(self):
488 iter = re.finditer(r":+", "a:b::c:::d")
489 self.assertEqual([item.group(0) for item in iter],
490 [":", "::", ":::"])
491
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000492def run_re_tests():
493 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
494 if verbose:
495 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000496 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000497 # To save time, only run the first and last 10 tests
498 #tests = tests[:10] + tests[-10:]
499 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000500
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000501 for t in tests:
502 sys.stdout.flush()
503 pattern = s = outcome = repl = expected = None
504 if len(t) == 5:
505 pattern, s, outcome, repl, expected = t
506 elif len(t) == 3:
507 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000508 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000509 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
510
Guido van Rossum41360a41998-03-26 19:42:58 +0000511 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000512 obj = re.compile(pattern)
513 except re.error:
514 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000515 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000516 print '=== Syntax error:', t
517 except KeyboardInterrupt: raise KeyboardInterrupt
518 except:
519 print '*** Unexpected error ***', t
520 if verbose:
521 traceback.print_exc(file=sys.stdout)
522 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000523 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000524 result = obj.search(s)
525 except re.error, msg:
526 print '=== Unexpected exception', t, repr(msg)
527 if outcome == SYNTAX_ERROR:
528 # This should have been a syntax error; forget it.
529 pass
530 elif outcome == FAIL:
531 if result is None: pass # No match, as expected
532 else: print '=== Succeeded incorrectly', t
533 elif outcome == SUCCEED:
534 if result is not None:
535 # Matched, as expected, so now we compute the
536 # result string and compare it to our expected result.
537 start, end = result.span(0)
538 vardict={'found': result.group(0),
539 'groups': result.group(),
540 'flags': result.re.flags}
541 for i in range(1, 100):
542 try:
543 gi = result.group(i)
544 # Special hack because else the string concat fails:
545 if gi is None:
546 gi = "None"
547 except IndexError:
548 gi = "Error"
549 vardict['g%d' % i] = gi
550 for i in result.re.groupindex.keys():
551 try:
552 gi = result.group(i)
553 if gi is None:
554 gi = "None"
555 except IndexError:
556 gi = "Error"
557 vardict[i] = gi
558 repl = eval(repl, vardict)
559 if repl != expected:
560 print '=== grouping error', t,
561 print repr(repl) + ' should be ' + repr(expected)
562 else:
563 print '=== Failed incorrectly', t
564
565 # Try the match on a unicode string, and check that it
566 # still succeeds.
567 try:
568 result = obj.search(unicode(s, "latin-1"))
569 if result is None:
570 print '=== Fails on unicode match', t
571 except NameError:
572 continue # 1.5.2
573 except TypeError:
574 continue # unicode test case
575
576 # Try the match on a unicode pattern, and check that it
577 # still succeeds.
578 obj=re.compile(unicode(pattern, "latin-1"))
579 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000580 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000581 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000582
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000583 # Try the match with the search area limited to the extent
584 # of the match and see if it still succeeds. \B will
585 # break (because it won't match at the end or start of a
586 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000587
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000588 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
589 and result is not None:
590 obj = re.compile(pattern)
591 result = obj.search(s, result.start(0), result.end(0) + 1)
592 if result is None:
593 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000594
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000595 # Try the match with IGNORECASE enabled, and check that it
596 # still succeeds.
597 obj = re.compile(pattern, re.IGNORECASE)
598 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000599 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000600 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000601
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000602 # Try the match with LOCALE enabled, and check that it
603 # still succeeds.
604 obj = re.compile(pattern, re.LOCALE)
605 result = obj.search(s)
606 if result is None:
607 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000608
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000609 # Try the match with UNICODE locale enabled, and check
610 # that it still succeeds.
611 obj = re.compile(pattern, re.UNICODE)
612 result = obj.search(s)
613 if result is None:
614 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000615
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000616def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000617 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000618 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000619
620if __name__ == "__main__":
621 test_main()