blob: c7afdc59a2a036ab748d2c0e4762ccba708ca402 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Skip Montanaro8ed06da2003-04-24 19:43:18 +000086 def test_qualified_re_sub(self):
87 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
88 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +000089
Skip Montanaro2726fcd2003-04-25 14:31:54 +000090 def test_bug_114660(self):
91 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
92 'hello there')
93
94 def test_bug_462270(self):
95 # Test for empty sub() behaviour, see SF bug #462270
96 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
97 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
98
Skip Montanaro8ed06da2003-04-24 19:43:18 +000099 def test_symbolic_refs(self):
100 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
101 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
102 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
103 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
104 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
105 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
106 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
107 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000108
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000109 def test_re_subn(self):
110 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
111 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
112 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
113 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
114 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000115
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000116 def test_re_split(self):
117 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
118 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
119 self.assertEqual(re.split("(:*)", ":a:b::c"),
120 ['', ':', 'a', ':', 'b', '::', 'c'])
121 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
122 self.assertEqual(re.split("(:)*", ":a:b::c"),
123 ['', ':', 'a', ':', 'b', ':', 'c'])
124 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
125 ['', ':', 'a', ':b::', 'c'])
126 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
127 ['', None, ':', 'a', None, ':', '', 'b', None, '',
128 None, '::', 'c'])
129 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
130 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000131
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000132 def test_qualified_re_split(self):
133 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
134 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
135 self.assertEqual(re.split("(:)", ":a:b::c", 2),
136 ['', ':', 'a', ':', 'b::c'])
137 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
138 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000139
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000140 def test_re_findall(self):
141 self.assertEqual(re.findall(":+", "abc"), [])
142 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
143 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
144 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
145 (":", ":"),
146 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000147
Skip Montanaro5ba00542003-04-25 16:00:14 +0000148 def test_bug_117612(self):
149 self.assertEqual(re.findall(r"(a|(b))", "aba"),
150 [("a", ""),("b", "b"),("a", "")])
151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000153 self.assertEqual(re.match('a', 'a').groups(), ())
154 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
155 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
156 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
157 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000158
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000159 pat = re.compile('((a)|(b))(c)?')
160 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
161 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
162 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
163 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
164 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000165
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000166 # A single group
167 m = re.match('(a)', 'a')
168 self.assertEqual(m.group(0), 'a')
169 self.assertEqual(m.group(0), 'a')
170 self.assertEqual(m.group(1), 'a')
171 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000172
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000173 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
174 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
175 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
176 (None, 'b', None))
177 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000178
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000179 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000180 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
181 ('(', 'a'))
182 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
183 (None, 'a'))
184 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
185 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
186 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
187 ('a', 'b'))
188 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
189 (None, 'd'))
190 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
191 (None, 'd'))
192 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
193 ('a', ''))
194
195 def test_re_groupref(self):
196 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
197 ('|', 'a'))
198 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
199 (None, 'a'))
200 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
201 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
202 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
203 ('a', 'a'))
204 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
205 (None, None))
206
207 def test_groupdict(self):
208 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
209 'first second').groupdict(),
210 {'first':'first', 'second':'second'})
211
212 def test_expand(self):
213 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
214 "first second")
215 .expand(r"\2 \1 \g<second> \g<first>"),
216 "second first second first")
217
218 def test_repeat_minmax(self):
219 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
220 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
221 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
222 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
223
224 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
225 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
226 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
227 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
228 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
229 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
230 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
231 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
232
233 self.assertEqual(re.match("^x{1}$", "xxx"), None)
234 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
235 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
236 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
237
238 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
239 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
240 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
241 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
242 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
243 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
244 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
245 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
246
247 def test_getattr(self):
248 self.assertEqual(re.match("(a)", "a").pos, 0)
249 self.assertEqual(re.match("(a)", "a").endpos, 1)
250 self.assertEqual(re.match("(a)", "a").string, "a")
251 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
252 self.assertNotEqual(re.match("(a)", "a").re, None)
253
254 def test_special_escapes(self):
255 self.assertEqual(re.search(r"\b(b.)\b",
256 "abcd abc bcd bx").group(1), "bx")
257 self.assertEqual(re.search(r"\B(b.)\B",
258 "abc bcd bc abxd").group(1), "bx")
259 self.assertEqual(re.search(r"\b(b.)\b",
260 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
261 self.assertEqual(re.search(r"\B(b.)\B",
262 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
263 self.assertEqual(re.search(r"\b(b.)\b",
264 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
265 self.assertEqual(re.search(r"\B(b.)\B",
266 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
267 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
268 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
269 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
270 self.assertEqual(re.search(r"\b(b.)\b",
271 u"abcd abc bcd bx").group(1), "bx")
272 self.assertEqual(re.search(r"\B(b.)\B",
273 u"abc bcd bc abxd").group(1), "bx")
274 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
275 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
276 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
277 self.assertEqual(re.search(r"\d\D\w\W\s\S",
278 "1aa! a").group(0), "1aa! a")
279 self.assertEqual(re.search(r"\d\D\w\W\s\S",
280 "1aa! a", re.LOCALE).group(0), "1aa! a")
281 self.assertEqual(re.search(r"\d\D\w\W\s\S",
282 "1aa! a", re.UNICODE).group(0), "1aa! a")
283
284 def test_ignore_case(self):
285 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
286 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
287
288 def test_bigcharset(self):
289 self.assertEqual(re.match(u"([\u2222\u2223])",
290 u"\u2222").group(1), u"\u2222")
291 self.assertEqual(re.match(u"([\u2222\u2223])",
292 u"\u2222", re.UNICODE).group(1), u"\u2222")
293
294 def test_anyall(self):
295 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
296 "a\nb")
297 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
298 "a\n\nb")
299
300 def test_non_consuming(self):
301 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
302 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
303 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
304 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
305 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
306 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
307 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
308
309 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
310 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
311 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
312 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
313
314 def test_ignore_case(self):
315 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
316 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
317 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
318 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
319 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
320 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
321 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
322 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
323
324 def test_category(self):
325 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
326
327 def test_getlower(self):
328 import _sre
329 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
330 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
331 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
332
333 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
334 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
335
336 def test_not_literal(self):
337 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
338 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
339
340 def test_search_coverage(self):
341 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
342 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
343
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000344 def test_re_escape(self):
345 p=""
346 for i in range(0, 256):
347 p = p + chr(i)
348 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
349 True)
350 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000351
Skip Montanaro1e703c62003-04-25 15:40:28 +0000352 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000353 self.assertEqual(pat.match(p) is not None, True)
354 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000355
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000356 def test_pickling(self):
357 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000358 self.pickle_test(pickle)
359 import cPickle
360 self.pickle_test(cPickle)
361
362 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000363 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
364 s = pickle.dumps(oldpat)
365 newpat = pickle.loads(s)
366 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000367
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000368 def test_constants(self):
369 self.assertEqual(re.I, re.IGNORECASE)
370 self.assertEqual(re.L, re.LOCALE)
371 self.assertEqual(re.M, re.MULTILINE)
372 self.assertEqual(re.S, re.DOTALL)
373 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000374
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000375 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000376 for flag in [re.I, re.M, re.X, re.S, re.L]:
377 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000378
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000379 def test_sre_character_literals(self):
380 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
381 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
382 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
383 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
384 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
385 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
386 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
387 self.assertRaises(re.error, re.match, "\911", "")
388
389 def test_bug_113254(self):
390 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
391 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
392 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
393
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000394 def test_bug_527371(self):
395 # bug described in patches 527371/672491
396 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
397 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
398 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
399 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
400 self.assertEqual(re.match("((a))", "a").lastindex, 1)
401
402 def test_bug_545855(self):
403 # bug 545855 -- This pattern failed to cause a compile error as it
404 # should, instead provoking a TypeError.
405 self.assertRaises(re.error, re.compile, 'foo[a-')
406
407 def test_bug_418626(self):
408 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
409 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
410 # pattern '*?' on a long string.
411 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
412 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
413 20003)
414 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000415 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000416 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000417 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000418
419 def test_bug_612074(self):
420 pat=u"["+re.escape(u"\u2039")+u"]"
421 self.assertEqual(re.compile(pat) and 1, 1)
422
Skip Montanaro1e703c62003-04-25 15:40:28 +0000423 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000424 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000425 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000426 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
427 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
428 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000429
430 def test_scanner(self):
431 def s_ident(scanner, token): return token
432 def s_operator(scanner, token): return "op%s" % token
433 def s_float(scanner, token): return float(token)
434 def s_int(scanner, token): return int(token)
435
436 scanner = Scanner([
437 (r"[a-zA-Z_]\w*", s_ident),
438 (r"\d+\.\d*", s_float),
439 (r"\d+", s_int),
440 (r"=|\+|-|\*|/", s_operator),
441 (r"\s+", None),
442 ])
443
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000444 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
445
Skip Montanaro1e703c62003-04-25 15:40:28 +0000446 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
447 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
448 'op+', 'bar'], ''))
449
Skip Montanaro5ba00542003-04-25 16:00:14 +0000450 def test_bug_448951(self):
451 # bug 448951 (similar to 429357, but with single char match)
452 # (Also test greedy matches.)
453 for op in '','?','*':
454 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
455 (None, None))
456 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
457 ('a:', 'a'))
458
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000459 def test_bug_725106(self):
460 # capturing groups in alternatives in repeats
461 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
462 ('b', 'a'))
463 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
464 ('c', 'b'))
465 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
466 ('b', None))
467 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
468 ('b', None))
469 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
470 ('b', 'a'))
471 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
472 ('c', 'b'))
473 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
474 ('b', None))
475 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
476 ('b', None))
477
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000478 def test_bug_725149(self):
479 # mark_stack_base restoring before restoring marks
480 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
481 ('a', None))
482 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
483 ('a', None, None))
484
Just van Rossum12723ba2003-07-02 20:03:04 +0000485 def test_bug_764548(self):
486 # bug 764548, re.compile() barfs on str/unicode subclasses
487 try:
488 unicode
489 except NameError:
490 return # no problem if we have no unicode
491 class my_unicode(unicode): pass
492 pat = re.compile(my_unicode("abc"))
493 self.assertEqual(pat.match("xyz"), None)
494
Skip Montanaro5ba00542003-04-25 16:00:14 +0000495 def test_finditer(self):
496 iter = re.finditer(r":+", "a:b::c:::d")
497 self.assertEqual([item.group(0) for item in iter],
498 [":", "::", ":::"])
499
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000500 def test_bug_926075(self):
501 try:
502 unicode
503 except NameError:
504 return # no problem if we have no unicode
505 self.assert_(re.compile('bug_926075') is not
506 re.compile(eval("u'bug_926075'")))
507
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000508 def test_bug_931848(self):
509 try:
510 unicode
511 except NameError:
512 pass
513 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
514 self.assertEqual(re.compile(pattern).split("a.b.c"),
515 ['a','b','c'])
516
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000517def run_re_tests():
518 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
519 if verbose:
520 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000521 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000522 # To save time, only run the first and last 10 tests
523 #tests = tests[:10] + tests[-10:]
524 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000525
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000526 for t in tests:
527 sys.stdout.flush()
528 pattern = s = outcome = repl = expected = None
529 if len(t) == 5:
530 pattern, s, outcome, repl, expected = t
531 elif len(t) == 3:
532 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000533 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000534 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
535
Guido van Rossum41360a41998-03-26 19:42:58 +0000536 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000537 obj = re.compile(pattern)
538 except re.error:
539 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000540 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000541 print '=== Syntax error:', t
542 except KeyboardInterrupt: raise KeyboardInterrupt
543 except:
544 print '*** Unexpected error ***', t
545 if verbose:
546 traceback.print_exc(file=sys.stdout)
547 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000548 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000549 result = obj.search(s)
550 except re.error, msg:
551 print '=== Unexpected exception', t, repr(msg)
552 if outcome == SYNTAX_ERROR:
553 # This should have been a syntax error; forget it.
554 pass
555 elif outcome == FAIL:
556 if result is None: pass # No match, as expected
557 else: print '=== Succeeded incorrectly', t
558 elif outcome == SUCCEED:
559 if result is not None:
560 # Matched, as expected, so now we compute the
561 # result string and compare it to our expected result.
562 start, end = result.span(0)
563 vardict={'found': result.group(0),
564 'groups': result.group(),
565 'flags': result.re.flags}
566 for i in range(1, 100):
567 try:
568 gi = result.group(i)
569 # Special hack because else the string concat fails:
570 if gi is None:
571 gi = "None"
572 except IndexError:
573 gi = "Error"
574 vardict['g%d' % i] = gi
575 for i in result.re.groupindex.keys():
576 try:
577 gi = result.group(i)
578 if gi is None:
579 gi = "None"
580 except IndexError:
581 gi = "Error"
582 vardict[i] = gi
583 repl = eval(repl, vardict)
584 if repl != expected:
585 print '=== grouping error', t,
586 print repr(repl) + ' should be ' + repr(expected)
587 else:
588 print '=== Failed incorrectly', t
589
590 # Try the match on a unicode string, and check that it
591 # still succeeds.
592 try:
593 result = obj.search(unicode(s, "latin-1"))
594 if result is None:
595 print '=== Fails on unicode match', t
596 except NameError:
597 continue # 1.5.2
598 except TypeError:
599 continue # unicode test case
600
601 # Try the match on a unicode pattern, and check that it
602 # still succeeds.
603 obj=re.compile(unicode(pattern, "latin-1"))
604 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000605 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000606 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000607
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000608 # Try the match with the search area limited to the extent
609 # of the match and see if it still succeeds. \B will
610 # break (because it won't match at the end or start of a
611 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000612
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000613 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
614 and result is not None:
615 obj = re.compile(pattern)
616 result = obj.search(s, result.start(0), result.end(0) + 1)
617 if result is None:
618 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000619
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000620 # Try the match with IGNORECASE enabled, and check that it
621 # still succeeds.
622 obj = re.compile(pattern, re.IGNORECASE)
623 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000624 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000625 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000626
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000627 # Try the match with LOCALE enabled, and check that it
628 # still succeeds.
629 obj = re.compile(pattern, re.LOCALE)
630 result = obj.search(s)
631 if result is None:
632 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000633
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000634 # Try the match with UNICODE locale enabled, and check
635 # that it still succeeds.
636 obj = re.compile(pattern, re.UNICODE)
637 result = obj.search(s)
638 if result is None:
639 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000640
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000641def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000642 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000643 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000644
645if __name__ == "__main__":
646 test_main()