blob: eab995de6e6565cdf0ca200793654a474607ba93 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Walter Dörwald21d3a322003-05-01 17:45:56 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Skip Montanaro1e703c62003-04-25 15:40:28 +00006from sre import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000086 def test_sub_template_numeric_escape(self):
87 # bug 776311 and friends
88 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
89 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
90 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
91 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
92 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
93 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
94 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
95
96 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
97 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
98
99 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
100 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
101 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
102 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
103 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
104
105 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
106 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000107
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000108 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
109 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
110 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
111 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
112 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
113 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
114 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
115 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
116 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
117 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
118 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
119 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
120
121 # in python2.3 (etc), these loop endlessly in sre_parser.py
122 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
123 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
124 'xz8')
125 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
126 'xza')
127
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000128 def test_qualified_re_sub(self):
129 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
130 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000131
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000132 def test_bug_114660(self):
133 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
134 'hello there')
135
136 def test_bug_462270(self):
137 # Test for empty sub() behaviour, see SF bug #462270
138 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
139 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
140
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000141 def test_symbolic_refs(self):
142 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
143 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
144 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
145 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
146 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
147 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
148 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
149 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000150 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_re_subn(self):
153 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
154 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
155 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
156 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
157 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000158
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000159 def test_re_split(self):
160 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
161 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
162 self.assertEqual(re.split("(:*)", ":a:b::c"),
163 ['', ':', 'a', ':', 'b', '::', 'c'])
164 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
165 self.assertEqual(re.split("(:)*", ":a:b::c"),
166 ['', ':', 'a', ':', 'b', ':', 'c'])
167 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
168 ['', ':', 'a', ':b::', 'c'])
169 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
170 ['', None, ':', 'a', None, ':', '', 'b', None, '',
171 None, '::', 'c'])
172 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
173 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000174
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000175 def test_qualified_re_split(self):
176 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
177 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
178 self.assertEqual(re.split("(:)", ":a:b::c", 2),
179 ['', ':', 'a', ':', 'b::c'])
180 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
181 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000182
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000183 def test_re_findall(self):
184 self.assertEqual(re.findall(":+", "abc"), [])
185 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
186 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
187 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
188 (":", ":"),
189 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000190
Skip Montanaro5ba00542003-04-25 16:00:14 +0000191 def test_bug_117612(self):
192 self.assertEqual(re.findall(r"(a|(b))", "aba"),
193 [("a", ""),("b", "b"),("a", "")])
194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000196 self.assertEqual(re.match('a', 'a').groups(), ())
197 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
198 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
199 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
200 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000201
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 pat = re.compile('((a)|(b))(c)?')
203 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
204 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
205 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
206 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
207 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000208
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000209 # A single group
210 m = re.match('(a)', 'a')
211 self.assertEqual(m.group(0), 'a')
212 self.assertEqual(m.group(0), 'a')
213 self.assertEqual(m.group(1), 'a')
214 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000215
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000216 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
217 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
218 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
219 (None, 'b', None))
220 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000221
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000222 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000223 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
224 ('(', 'a'))
225 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
226 (None, 'a'))
227 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
228 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
229 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
230 ('a', 'b'))
231 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
232 (None, 'd'))
233 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
234 (None, 'd'))
235 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
236 ('a', ''))
237
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000238 # Tests for bug #1177831: exercise groups other than the first group
239 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
240 self.assertEqual(p.match('abc').groups(),
241 ('a', 'b', 'c'))
242 self.assertEqual(p.match('ad').groups(),
243 ('a', None, 'd'))
244 self.assertEqual(p.match('abd'), None)
245 self.assertEqual(p.match('ac'), None)
246
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000247
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000248 def test_re_groupref(self):
249 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
250 ('|', 'a'))
251 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
252 (None, 'a'))
253 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
254 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
255 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
256 ('a', 'a'))
257 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
258 (None, None))
259
260 def test_groupdict(self):
261 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
262 'first second').groupdict(),
263 {'first':'first', 'second':'second'})
264
265 def test_expand(self):
266 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
267 "first second")
268 .expand(r"\2 \1 \g<second> \g<first>"),
269 "second first second first")
270
271 def test_repeat_minmax(self):
272 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
273 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
274 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
275 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
276
277 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
278 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
279 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
280 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
281 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
282 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
283 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
284 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
285
286 self.assertEqual(re.match("^x{1}$", "xxx"), None)
287 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
288 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
289 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
290
291 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
292 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
293 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
294 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
295 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
296 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
297 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
298 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
299
300 def test_getattr(self):
301 self.assertEqual(re.match("(a)", "a").pos, 0)
302 self.assertEqual(re.match("(a)", "a").endpos, 1)
303 self.assertEqual(re.match("(a)", "a").string, "a")
304 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
305 self.assertNotEqual(re.match("(a)", "a").re, None)
306
307 def test_special_escapes(self):
308 self.assertEqual(re.search(r"\b(b.)\b",
309 "abcd abc bcd bx").group(1), "bx")
310 self.assertEqual(re.search(r"\B(b.)\B",
311 "abc bcd bc abxd").group(1), "bx")
312 self.assertEqual(re.search(r"\b(b.)\b",
313 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
314 self.assertEqual(re.search(r"\B(b.)\B",
315 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
316 self.assertEqual(re.search(r"\b(b.)\b",
317 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
318 self.assertEqual(re.search(r"\B(b.)\B",
319 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
320 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
321 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
322 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
323 self.assertEqual(re.search(r"\b(b.)\b",
324 u"abcd abc bcd bx").group(1), "bx")
325 self.assertEqual(re.search(r"\B(b.)\B",
326 u"abc bcd bc abxd").group(1), "bx")
327 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
328 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
329 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
330 self.assertEqual(re.search(r"\d\D\w\W\s\S",
331 "1aa! a").group(0), "1aa! a")
332 self.assertEqual(re.search(r"\d\D\w\W\s\S",
333 "1aa! a", re.LOCALE).group(0), "1aa! a")
334 self.assertEqual(re.search(r"\d\D\w\W\s\S",
335 "1aa! a", re.UNICODE).group(0), "1aa! a")
336
337 def test_ignore_case(self):
338 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
339 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
340
341 def test_bigcharset(self):
342 self.assertEqual(re.match(u"([\u2222\u2223])",
343 u"\u2222").group(1), u"\u2222")
344 self.assertEqual(re.match(u"([\u2222\u2223])",
345 u"\u2222", re.UNICODE).group(1), u"\u2222")
346
347 def test_anyall(self):
348 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
349 "a\nb")
350 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
351 "a\n\nb")
352
353 def test_non_consuming(self):
354 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
355 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
356 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
357 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
358 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
359 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
360 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
361
362 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
363 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
364 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
365 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
366
367 def test_ignore_case(self):
368 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
369 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
370 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
371 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
372 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
373 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
374 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
375 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
376
377 def test_category(self):
378 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
379
380 def test_getlower(self):
381 import _sre
382 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
383 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
384 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
385
386 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
387 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
388
389 def test_not_literal(self):
390 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
391 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
392
393 def test_search_coverage(self):
394 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
395 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
396
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000397 def test_re_escape(self):
398 p=""
399 for i in range(0, 256):
400 p = p + chr(i)
401 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
402 True)
403 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000404
Skip Montanaro1e703c62003-04-25 15:40:28 +0000405 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000406 self.assertEqual(pat.match(p) is not None, True)
407 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000408
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000409 def test_pickling(self):
410 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000411 self.pickle_test(pickle)
412 import cPickle
413 self.pickle_test(cPickle)
414
415 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000416 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
417 s = pickle.dumps(oldpat)
418 newpat = pickle.loads(s)
419 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000420
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000421 def test_constants(self):
422 self.assertEqual(re.I, re.IGNORECASE)
423 self.assertEqual(re.L, re.LOCALE)
424 self.assertEqual(re.M, re.MULTILINE)
425 self.assertEqual(re.S, re.DOTALL)
426 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000427
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000428 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000429 for flag in [re.I, re.M, re.X, re.S, re.L]:
430 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000431
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000432 def test_sre_character_literals(self):
433 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
434 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
435 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
436 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
437 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
438 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
439 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
440 self.assertRaises(re.error, re.match, "\911", "")
441
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000442 def test_sre_character_class_literals(self):
443 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
444 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
445 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
446 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
447 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
448 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
449 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
450 self.assertRaises(re.error, re.match, "[\911]", "")
451
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000452 def test_bug_113254(self):
453 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
454 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
455 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
456
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000457 def test_bug_527371(self):
458 # bug described in patches 527371/672491
459 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
460 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
461 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
462 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
463 self.assertEqual(re.match("((a))", "a").lastindex, 1)
464
465 def test_bug_545855(self):
466 # bug 545855 -- This pattern failed to cause a compile error as it
467 # should, instead provoking a TypeError.
468 self.assertRaises(re.error, re.compile, 'foo[a-')
469
470 def test_bug_418626(self):
471 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
472 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
473 # pattern '*?' on a long string.
474 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
475 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
476 20003)
477 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000478 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000479 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000480 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000481
482 def test_bug_612074(self):
483 pat=u"["+re.escape(u"\u2039")+u"]"
484 self.assertEqual(re.compile(pat) and 1, 1)
485
Skip Montanaro1e703c62003-04-25 15:40:28 +0000486 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000487 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000488 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000489 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
490 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
491 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000492
493 def test_scanner(self):
494 def s_ident(scanner, token): return token
495 def s_operator(scanner, token): return "op%s" % token
496 def s_float(scanner, token): return float(token)
497 def s_int(scanner, token): return int(token)
498
499 scanner = Scanner([
500 (r"[a-zA-Z_]\w*", s_ident),
501 (r"\d+\.\d*", s_float),
502 (r"\d+", s_int),
503 (r"=|\+|-|\*|/", s_operator),
504 (r"\s+", None),
505 ])
506
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000507 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
508
Skip Montanaro1e703c62003-04-25 15:40:28 +0000509 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
510 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
511 'op+', 'bar'], ''))
512
Skip Montanaro5ba00542003-04-25 16:00:14 +0000513 def test_bug_448951(self):
514 # bug 448951 (similar to 429357, but with single char match)
515 # (Also test greedy matches.)
516 for op in '','?','*':
517 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
518 (None, None))
519 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
520 ('a:', 'a'))
521
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000522 def test_bug_725106(self):
523 # capturing groups in alternatives in repeats
524 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
525 ('b', 'a'))
526 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
527 ('c', 'b'))
528 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
529 ('b', None))
530 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
531 ('b', None))
532 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
533 ('b', 'a'))
534 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
535 ('c', 'b'))
536 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
537 ('b', None))
538 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
539 ('b', None))
540
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000541 def test_bug_725149(self):
542 # mark_stack_base restoring before restoring marks
543 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
544 ('a', None))
545 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
546 ('a', None, None))
547
Just van Rossum12723ba2003-07-02 20:03:04 +0000548 def test_bug_764548(self):
549 # bug 764548, re.compile() barfs on str/unicode subclasses
550 try:
551 unicode
552 except NameError:
553 return # no problem if we have no unicode
554 class my_unicode(unicode): pass
555 pat = re.compile(my_unicode("abc"))
556 self.assertEqual(pat.match("xyz"), None)
557
Skip Montanaro5ba00542003-04-25 16:00:14 +0000558 def test_finditer(self):
559 iter = re.finditer(r":+", "a:b::c:::d")
560 self.assertEqual([item.group(0) for item in iter],
561 [":", "::", ":::"])
562
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000563 def test_bug_926075(self):
564 try:
565 unicode
566 except NameError:
567 return # no problem if we have no unicode
568 self.assert_(re.compile('bug_926075') is not
569 re.compile(eval("u'bug_926075'")))
570
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000571 def test_bug_931848(self):
572 try:
573 unicode
574 except NameError:
575 pass
576 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
577 self.assertEqual(re.compile(pattern).split("a.b.c"),
578 ['a','b','c'])
579
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000580 def test_bug_581080(self):
581 iter = re.finditer(r"\s", "a b")
582 self.assertEqual(iter.next().span(), (1,2))
583 self.assertRaises(StopIteration, iter.next)
584
585 scanner = re.compile(r"\s").scanner("a b")
586 self.assertEqual(scanner.search().span(), (1, 2))
587 self.assertEqual(scanner.search(), None)
588
589 def test_bug_817234(self):
590 iter = re.finditer(r".*", "asdf")
591 self.assertEqual(iter.next().span(), (0, 4))
592 self.assertEqual(iter.next().span(), (4, 4))
593 self.assertRaises(StopIteration, iter.next)
594
595
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000596def run_re_tests():
597 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
598 if verbose:
599 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000600 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000601 # To save time, only run the first and last 10 tests
602 #tests = tests[:10] + tests[-10:]
603 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000604
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000605 for t in tests:
606 sys.stdout.flush()
607 pattern = s = outcome = repl = expected = None
608 if len(t) == 5:
609 pattern, s, outcome, repl, expected = t
610 elif len(t) == 3:
611 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000612 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000613 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
614
Guido van Rossum41360a41998-03-26 19:42:58 +0000615 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000616 obj = re.compile(pattern)
617 except re.error:
618 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000619 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000620 print '=== Syntax error:', t
621 except KeyboardInterrupt: raise KeyboardInterrupt
622 except:
623 print '*** Unexpected error ***', t
624 if verbose:
625 traceback.print_exc(file=sys.stdout)
626 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000627 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000628 result = obj.search(s)
629 except re.error, msg:
630 print '=== Unexpected exception', t, repr(msg)
631 if outcome == SYNTAX_ERROR:
632 # This should have been a syntax error; forget it.
633 pass
634 elif outcome == FAIL:
635 if result is None: pass # No match, as expected
636 else: print '=== Succeeded incorrectly', t
637 elif outcome == SUCCEED:
638 if result is not None:
639 # Matched, as expected, so now we compute the
640 # result string and compare it to our expected result.
641 start, end = result.span(0)
642 vardict={'found': result.group(0),
643 'groups': result.group(),
644 'flags': result.re.flags}
645 for i in range(1, 100):
646 try:
647 gi = result.group(i)
648 # Special hack because else the string concat fails:
649 if gi is None:
650 gi = "None"
651 except IndexError:
652 gi = "Error"
653 vardict['g%d' % i] = gi
654 for i in result.re.groupindex.keys():
655 try:
656 gi = result.group(i)
657 if gi is None:
658 gi = "None"
659 except IndexError:
660 gi = "Error"
661 vardict[i] = gi
662 repl = eval(repl, vardict)
663 if repl != expected:
664 print '=== grouping error', t,
665 print repr(repl) + ' should be ' + repr(expected)
666 else:
667 print '=== Failed incorrectly', t
668
669 # Try the match on a unicode string, and check that it
670 # still succeeds.
671 try:
672 result = obj.search(unicode(s, "latin-1"))
673 if result is None:
674 print '=== Fails on unicode match', t
675 except NameError:
676 continue # 1.5.2
677 except TypeError:
678 continue # unicode test case
679
680 # Try the match on a unicode pattern, and check that it
681 # still succeeds.
682 obj=re.compile(unicode(pattern, "latin-1"))
683 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000684 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000685 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000686
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000687 # Try the match with the search area limited to the extent
688 # of the match and see if it still succeeds. \B will
689 # break (because it won't match at the end or start of a
690 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000691
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000692 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
693 and result is not None:
694 obj = re.compile(pattern)
695 result = obj.search(s, result.start(0), result.end(0) + 1)
696 if result is None:
697 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000698
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000699 # Try the match with IGNORECASE enabled, and check that it
700 # still succeeds.
701 obj = re.compile(pattern, re.IGNORECASE)
702 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000703 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000704 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000705
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000706 # Try the match with LOCALE enabled, and check that it
707 # still succeeds.
708 obj = re.compile(pattern, re.LOCALE)
709 result = obj.search(s)
710 if result is None:
711 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000712
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000713 # Try the match with UNICODE locale enabled, and check
714 # that it still succeeds.
715 obj = re.compile(pattern, re.UNICODE)
716 result = obj.search(s)
717 if result is None:
718 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000719
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000720def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000721 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000722 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000723
724if __name__ == "__main__":
725 test_main()