blob: 0f39eaddde69b6b0e7c609632b4727a7ed1bf615 [file] [log] [blame]
Brett Cannon1cd02472008-09-09 01:52:27 +00001from test.support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02004import sys
5import string
6import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00007from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020012# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000018
19 def test_weakref(self):
20 s = 'QabbbcR'
21 x = re.compile('ab+c')
22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24
Skip Montanaro8ed06da2003-04-24 19:43:18 +000025 def test_search_star_plus(self):
26 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000030 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000031 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000035 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000036
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000038 int_value = int(matchobj.group(0))
39 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_basic_re_sub(self):
42 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000047
Skip Montanaro8ed06da2003-04-24 19:43:18 +000048 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 s = r"\1\1"
52 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
54 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000068
Skip Montanaro2726fcd2003-04-25 14:31:54 +000069 def test_bug_449964(self):
70 # fails for group followed by other escape
71 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b')
73
74 def test_bug_449000(self):
75 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000084
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000085 def test_bug_1661(self):
86 # Verify that flags do not get silently ignored with compiled patterns
87 pattern = re.compile('.')
88 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
89 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
90 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
91 self.assertRaises(ValueError, re.compile, pattern, re.I)
92
Guido van Rossum92f8f3e2008-09-10 14:30:50 +000093 def test_bug_3629(self):
94 # A regex that triggered a bug in the sre-code validator
95 re.compile("(?P<quote>)(?(quote))")
96
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000097 def test_sub_template_numeric_escape(self):
98 # bug 776311 and friends
99 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
100 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
101 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
102 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
103 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
104 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
105 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
106
107 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
108 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
109
110 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
111 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
112 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
113 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
114 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
115
116 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
117 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000118
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000119 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
120 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
121 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
122 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
123 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
124 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
125 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
126 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
127 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
128 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
129 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
130 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
131
132 # in python2.3 (etc), these loop endlessly in sre_parser.py
133 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
134 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
135 'xz8')
136 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
137 'xza')
138
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000139 def test_qualified_re_sub(self):
140 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
141 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000142
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000143 def test_bug_114660(self):
144 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
145 'hello there')
146
147 def test_bug_462270(self):
148 # Test for empty sub() behaviour, see SF bug #462270
149 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
150 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_symbolic_refs(self):
153 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
154 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
155 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
156 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
157 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
158 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
159 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
160 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000161 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000162
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000163 def test_re_subn(self):
164 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
165 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
166 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
167 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
168 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000169
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000170 def test_re_split(self):
171 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
172 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
173 self.assertEqual(re.split("(:*)", ":a:b::c"),
174 ['', ':', 'a', ':', 'b', '::', 'c'])
175 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
176 self.assertEqual(re.split("(:)*", ":a:b::c"),
177 ['', ':', 'a', ':', 'b', ':', 'c'])
178 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
179 ['', ':', 'a', ':b::', 'c'])
180 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
181 ['', None, ':', 'a', None, ':', '', 'b', None, '',
182 None, '::', 'c'])
183 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
184 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000185
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000186 def test_qualified_re_split(self):
187 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
188 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
189 self.assertEqual(re.split("(:)", ":a:b::c", 2),
190 ['', ':', 'a', ':', 'b::c'])
191 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
192 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000193
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000194 def test_re_findall(self):
195 self.assertEqual(re.findall(":+", "abc"), [])
196 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
197 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
198 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
199 (":", ":"),
200 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000201
Skip Montanaro5ba00542003-04-25 16:00:14 +0000202 def test_bug_117612(self):
203 self.assertEqual(re.findall(r"(a|(b))", "aba"),
204 [("a", ""),("b", "b"),("a", "")])
205
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000206 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000207 self.assertEqual(re.match('a', 'a').groups(), ())
208 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
209 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
210 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
211 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000212
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000213 pat = re.compile('((a)|(b))(c)?')
214 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
215 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
216 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
217 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
218 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 # A single group
221 m = re.match('(a)', 'a')
222 self.assertEqual(m.group(0), 'a')
223 self.assertEqual(m.group(0), 'a')
224 self.assertEqual(m.group(1), 'a')
225 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000227 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
228 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
229 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
230 (None, 'b', None))
231 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000232
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000233 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000234 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
235 ('(', 'a'))
236 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
237 (None, 'a'))
238 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
239 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
240 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
241 ('a', 'b'))
242 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
243 (None, 'd'))
244 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
245 (None, 'd'))
246 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
247 ('a', ''))
248
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000249 # Tests for bug #1177831: exercise groups other than the first group
250 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
251 self.assertEqual(p.match('abc').groups(),
252 ('a', 'b', 'c'))
253 self.assertEqual(p.match('ad').groups(),
254 ('a', None, 'd'))
255 self.assertEqual(p.match('abd'), None)
256 self.assertEqual(p.match('ac'), None)
257
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000258
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000259 def test_re_groupref(self):
260 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
261 ('|', 'a'))
262 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
263 (None, 'a'))
264 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
265 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
266 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
267 ('a', 'a'))
268 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
269 (None, None))
270
271 def test_groupdict(self):
272 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
273 'first second').groupdict(),
274 {'first':'first', 'second':'second'})
275
276 def test_expand(self):
277 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
278 "first second")
279 .expand(r"\2 \1 \g<second> \g<first>"),
280 "second first second first")
281
282 def test_repeat_minmax(self):
283 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
284 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
285 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
286 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
287
288 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
289 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
290 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
291 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
292 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
293 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
294 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
295 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
296
297 self.assertEqual(re.match("^x{1}$", "xxx"), None)
298 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
299 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
300 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
301
302 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
303 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
304 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
305 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
306 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
307 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
308 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
309 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
310
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000311 self.assertEqual(re.match("^x{}$", "xxx"), None)
312 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
313
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000314 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000315 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000316 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000317 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
318 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
319 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
320 {'first': 1, 'other': 2})
321
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000322 self.assertEqual(re.match("(a)", "a").pos, 0)
323 self.assertEqual(re.match("(a)", "a").endpos, 1)
324 self.assertEqual(re.match("(a)", "a").string, "a")
325 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
326 self.assertNotEqual(re.match("(a)", "a").re, None)
327
328 def test_special_escapes(self):
329 self.assertEqual(re.search(r"\b(b.)\b",
330 "abcd abc bcd bx").group(1), "bx")
331 self.assertEqual(re.search(r"\B(b.)\B",
332 "abc bcd bc abxd").group(1), "bx")
333 self.assertEqual(re.search(r"\b(b.)\b",
334 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
335 self.assertEqual(re.search(r"\B(b.)\B",
336 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
337 self.assertEqual(re.search(r"\b(b.)\b",
338 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
339 self.assertEqual(re.search(r"\B(b.)\B",
340 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
341 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
342 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
343 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
344 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000345 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000346 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000347 "abc bcd bc abxd").group(1), "bx")
348 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
349 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
350 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000351 self.assertEqual(re.search(r"\d\D\w\W\s\S",
352 "1aa! a").group(0), "1aa! a")
353 self.assertEqual(re.search(r"\d\D\w\W\s\S",
354 "1aa! a", re.LOCALE).group(0), "1aa! a")
355 self.assertEqual(re.search(r"\d\D\w\W\s\S",
356 "1aa! a", re.UNICODE).group(0), "1aa! a")
357
Ezio Melotti5a045b92012-02-29 11:48:44 +0200358 def test_string_boundaries(self):
359 # See http://bugs.python.org/issue10713
360 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
361 "abc")
362 # There's a word boundary at the start of a string.
363 self.assertTrue(re.match(r"\b", "abc"))
364 # A non-empty string includes a non-boundary zero-length match.
365 self.assertTrue(re.search(r"\B", "abc"))
366 # There is no non-boundary match at the start of a string.
367 self.assertFalse(re.match(r"\B", "abc"))
368 # However, an empty string contains no word boundaries, and also no
369 # non-boundaries.
370 self.assertEqual(re.search(r"\B", ""), None)
371 # This one is questionable and different from the perlre behaviour,
372 # but describes current behavior.
373 self.assertEqual(re.search(r"\b", ""), None)
374 # A single word-character string has two boundaries, but no
375 # non-boundary gaps.
376 self.assertEqual(len(re.findall(r"\b", "a")), 2)
377 self.assertEqual(len(re.findall(r"\B", "a")), 0)
378 # If there are no words, there are no boundaries
379 self.assertEqual(len(re.findall(r"\b", " ")), 0)
380 self.assertEqual(len(re.findall(r"\b", " ")), 0)
381 # Can match around the whitespace.
382 self.assertEqual(len(re.findall(r"\B", " ")), 2)
383
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000384 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000385 self.assertEqual(re.match("([\u2222\u2223])",
386 "\u2222").group(1), "\u2222")
387 self.assertEqual(re.match("([\u2222\u2223])",
388 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000389
390 def test_anyall(self):
391 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
392 "a\nb")
393 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
394 "a\n\nb")
395
396 def test_non_consuming(self):
397 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
398 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
399 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
400 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
401 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
402 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
403 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
404
405 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
406 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
407 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
408 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
409
410 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000411 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
412 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000413 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
414 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
415 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
416 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
417 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
418 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
419 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
420 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
421
422 def test_category(self):
423 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
424
425 def test_getlower(self):
426 import _sre
427 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
428 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
429 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
430
431 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000432 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000433
434 def test_not_literal(self):
435 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
436 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
437
438 def test_search_coverage(self):
439 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
440 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
441
Ezio Melottid2114eb2011-03-25 14:08:44 +0200442 def assertMatch(self, pattern, text, match=None, span=None,
443 matcher=re.match):
444 if match is None and span is None:
445 # the pattern matches the whole text
446 match = text
447 span = (0, len(text))
448 elif match is None or span is None:
449 raise ValueError('If match is not None, span should be specified '
450 '(and vice versa).')
451 m = matcher(pattern, text)
452 self.assertTrue(m)
453 self.assertEqual(m.group(), match)
454 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000455
Ezio Melottid2114eb2011-03-25 14:08:44 +0200456 def test_re_escape(self):
457 alnum_chars = string.ascii_letters + string.digits
458 p = ''.join(chr(i) for i in range(256))
459 for c in p:
460 if c in alnum_chars:
461 self.assertEqual(re.escape(c), c)
462 elif c == '\x00':
463 self.assertEqual(re.escape(c), '\\000')
464 else:
465 self.assertEqual(re.escape(c), '\\' + c)
466 self.assertMatch(re.escape(c), c)
467 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000468
Guido van Rossum698280d2008-09-10 17:44:35 +0000469 def test_re_escape_byte(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200470 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
471 p = bytes(range(256))
472 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000473 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200474 if b in alnum_chars:
475 self.assertEqual(re.escape(b), b)
476 elif i == 0:
477 self.assertEqual(re.escape(b), b'\\000')
478 else:
479 self.assertEqual(re.escape(b), b'\\' + b)
480 self.assertMatch(re.escape(b), b)
481 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000482
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200483 def test_re_escape_non_ascii(self):
484 s = 'xxx\u2620\u2620\u2620xxx'
485 s_escaped = re.escape(s)
486 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
487 self.assertMatch(s_escaped, s)
488 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
489 'x\u2620\u2620\u2620x', (2, 7), re.search)
490
491 def test_re_escape_non_ascii_bytes(self):
492 b = 'y\u2620y\u2620y'.encode('utf-8')
493 b_escaped = re.escape(b)
494 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
495 self.assertMatch(b_escaped, b)
496 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
497 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000498
Skip Montanaro1e703c62003-04-25 15:40:28 +0000499 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000500 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
501 s = pickle.dumps(oldpat)
502 newpat = pickle.loads(s)
503 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000504
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000505 def test_constants(self):
506 self.assertEqual(re.I, re.IGNORECASE)
507 self.assertEqual(re.L, re.LOCALE)
508 self.assertEqual(re.M, re.MULTILINE)
509 self.assertEqual(re.S, re.DOTALL)
510 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000511
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000512 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000513 for flag in [re.I, re.M, re.X, re.S, re.L]:
514 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000515
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000516 def test_sre_character_literals(self):
517 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
518 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
519 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
520 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
521 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
522 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
523 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
524 self.assertRaises(re.error, re.match, "\911", "")
525
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000526 def test_sre_character_class_literals(self):
527 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
528 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
529 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
530 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
531 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
532 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
533 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
534 self.assertRaises(re.error, re.match, "[\911]", "")
535
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000536 def test_bug_113254(self):
537 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
538 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
539 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
540
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000541 def test_bug_527371(self):
542 # bug described in patches 527371/672491
543 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
544 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
545 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
546 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
547 self.assertEqual(re.match("((a))", "a").lastindex, 1)
548
549 def test_bug_545855(self):
550 # bug 545855 -- This pattern failed to cause a compile error as it
551 # should, instead provoking a TypeError.
552 self.assertRaises(re.error, re.compile, 'foo[a-')
553
554 def test_bug_418626(self):
555 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
556 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
557 # pattern '*?' on a long string.
558 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
559 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
560 20003)
561 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000562 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000563 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000564 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000565
566 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000567 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000568 self.assertEqual(re.compile(pat) and 1, 1)
569
Skip Montanaro1e703c62003-04-25 15:40:28 +0000570 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000571 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000572 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000573 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
574 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
575 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000576
577 def test_scanner(self):
578 def s_ident(scanner, token): return token
579 def s_operator(scanner, token): return "op%s" % token
580 def s_float(scanner, token): return float(token)
581 def s_int(scanner, token): return int(token)
582
583 scanner = Scanner([
584 (r"[a-zA-Z_]\w*", s_ident),
585 (r"\d+\.\d*", s_float),
586 (r"\d+", s_int),
587 (r"=|\+|-|\*|/", s_operator),
588 (r"\s+", None),
589 ])
590
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000591 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
592
Skip Montanaro1e703c62003-04-25 15:40:28 +0000593 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
594 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
595 'op+', 'bar'], ''))
596
Skip Montanaro5ba00542003-04-25 16:00:14 +0000597 def test_bug_448951(self):
598 # bug 448951 (similar to 429357, but with single char match)
599 # (Also test greedy matches.)
600 for op in '','?','*':
601 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
602 (None, None))
603 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
604 ('a:', 'a'))
605
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000606 def test_bug_725106(self):
607 # capturing groups in alternatives in repeats
608 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
609 ('b', 'a'))
610 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
611 ('c', 'b'))
612 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
613 ('b', None))
614 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
615 ('b', None))
616 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
617 ('b', 'a'))
618 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
619 ('c', 'b'))
620 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
621 ('b', None))
622 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
623 ('b', None))
624
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000625 def test_bug_725149(self):
626 # mark_stack_base restoring before restoring marks
627 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
628 ('a', None))
629 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
630 ('a', None, None))
631
Just van Rossum12723ba2003-07-02 20:03:04 +0000632 def test_bug_764548(self):
633 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000634 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000635 pat = re.compile(my_unicode("abc"))
636 self.assertEqual(pat.match("xyz"), None)
637
Skip Montanaro5ba00542003-04-25 16:00:14 +0000638 def test_finditer(self):
639 iter = re.finditer(r":+", "a:b::c:::d")
640 self.assertEqual([item.group(0) for item in iter],
641 [":", "::", ":::"])
642
Thomas Wouters40a088d2008-03-18 20:19:54 +0000643 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000644 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000645 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000646
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000647 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000648 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000649 self.assertEqual(re.compile(pattern).split("a.b.c"),
650 ['a','b','c'])
651
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000652 def test_bug_581080(self):
653 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000654 self.assertEqual(next(iter).span(), (1,2))
655 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000656
657 scanner = re.compile(r"\s").scanner("a b")
658 self.assertEqual(scanner.search().span(), (1, 2))
659 self.assertEqual(scanner.search(), None)
660
661 def test_bug_817234(self):
662 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000663 self.assertEqual(next(iter).span(), (0, 4))
664 self.assertEqual(next(iter).span(), (4, 4))
665 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000666
Mark Dickinson1f268282009-07-28 17:22:36 +0000667 def test_bug_6561(self):
668 # '\d' should match characters in Unicode category 'Nd'
669 # (Number, Decimal Digit), but not those in 'Nl' (Number,
670 # Letter) or 'No' (Number, Other).
671 decimal_digits = [
672 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
673 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
674 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
675 ]
676 for x in decimal_digits:
677 self.assertEqual(re.match('^\d$', x).group(0), x)
678
679 not_decimal_digits = [
680 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
681 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
682 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
683 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
684 ]
685 for x in not_decimal_digits:
686 self.assertIsNone(re.match('^\d$', x))
687
Guido van Rossumd8faa362007-04-27 19:54:29 +0000688 def test_empty_array(self):
689 # SF buf 1647541
690 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000691 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000693 self.assertEqual(re.compile(b"bla").match(a), None)
694 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000695
Christian Heimes072c0f12008-01-03 23:01:04 +0000696 def test_inline_flags(self):
697 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000698 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
699 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000700
701 p = re.compile(upper_char, re.I | re.U)
702 q = p.match(lower_char)
703 self.assertNotEqual(q, None)
704
705 p = re.compile(lower_char, re.I | re.U)
706 q = p.match(upper_char)
707 self.assertNotEqual(q, None)
708
709 p = re.compile('(?i)' + upper_char, re.U)
710 q = p.match(lower_char)
711 self.assertNotEqual(q, None)
712
713 p = re.compile('(?i)' + lower_char, re.U)
714 q = p.match(upper_char)
715 self.assertNotEqual(q, None)
716
717 p = re.compile('(?iu)' + upper_char)
718 q = p.match(lower_char)
719 self.assertNotEqual(q, None)
720
721 p = re.compile('(?iu)' + lower_char)
722 q = p.match(upper_char)
723 self.assertNotEqual(q, None)
724
Christian Heimes25bb7832008-01-11 16:17:00 +0000725 def test_dollar_matches_twice(self):
726 "$ matches the end of string, and just before the terminating \n"
727 pattern = re.compile('$')
728 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
729 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
730 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
731
732 pattern = re.compile('$', re.MULTILINE)
733 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
734 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
735 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
736
Antoine Pitroufd036452008-08-19 17:56:33 +0000737 def test_bytes_str_mixing(self):
738 # Mixing str and bytes is disallowed
739 pat = re.compile('.')
740 bpat = re.compile(b'.')
741 self.assertRaises(TypeError, pat.match, b'b')
742 self.assertRaises(TypeError, bpat.match, 'b')
743 self.assertRaises(TypeError, pat.sub, b'b', 'c')
744 self.assertRaises(TypeError, pat.sub, 'b', b'c')
745 self.assertRaises(TypeError, pat.sub, b'b', b'c')
746 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
747 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
748 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
749
750 def test_ascii_and_unicode_flag(self):
751 # String patterns
752 for flags in (0, re.UNICODE):
753 pat = re.compile('\xc0', flags | re.IGNORECASE)
754 self.assertNotEqual(pat.match('\xe0'), None)
755 pat = re.compile('\w', flags)
756 self.assertNotEqual(pat.match('\xe0'), None)
757 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
758 self.assertEqual(pat.match('\xe0'), None)
759 pat = re.compile('(?a)\xc0', re.IGNORECASE)
760 self.assertEqual(pat.match('\xe0'), None)
761 pat = re.compile('\w', re.ASCII)
762 self.assertEqual(pat.match('\xe0'), None)
763 pat = re.compile('(?a)\w')
764 self.assertEqual(pat.match('\xe0'), None)
765 # Bytes patterns
766 for flags in (0, re.ASCII):
767 pat = re.compile(b'\xc0', re.IGNORECASE)
768 self.assertEqual(pat.match(b'\xe0'), None)
769 pat = re.compile(b'\w')
770 self.assertEqual(pat.match(b'\xe0'), None)
771 # Incompatibilities
772 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
773 self.assertRaises(ValueError, re.compile, b'(?u)\w')
774 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
775 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
776 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
777 self.assertRaises(ValueError, re.compile, '(?au)\w')
778
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000779 def test_bug_6509(self):
780 # Replacement strings of both types must parse properly.
781 # all strings
782 pat = re.compile('a(\w)')
783 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
784 pat = re.compile('a(.)')
785 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
786 pat = re.compile('..')
787 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
788
789 # all bytes
790 pat = re.compile(b'a(\w)')
791 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
792 pat = re.compile(b'a(.)')
793 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
794 pat = re.compile(b'..')
795 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
796
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000797 def test_dealloc(self):
798 # issue 3299: check for segfault in debug build
799 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000800 # the overflow limit is different on wide and narrow builds and it
801 # depends on the definition of SRE_CODE (see sre.h).
802 # 2**128 should be big enough to overflow on both. For smaller values
803 # a RuntimeError is raised instead of OverflowError.
804 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000805 self.assertRaises(TypeError, re.finditer, "a", {})
806 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000807 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000808
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000809def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000810 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000811 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000812 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000813 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000814 # To save time, only run the first and last 10 tests
815 #tests = tests[:10] + tests[-10:]
816 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000817
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000818 for t in tests:
819 sys.stdout.flush()
820 pattern = s = outcome = repl = expected = None
821 if len(t) == 5:
822 pattern, s, outcome, repl, expected = t
823 elif len(t) == 3:
824 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000825 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000826 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000827
Guido van Rossum41360a41998-03-26 19:42:58 +0000828 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000829 obj = re.compile(pattern)
830 except re.error:
831 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000832 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000833 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000834 except KeyboardInterrupt: raise KeyboardInterrupt
835 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000836 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000837 if verbose:
838 traceback.print_exc(file=sys.stdout)
839 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000840 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000841 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000842 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000843 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000844 if outcome == SYNTAX_ERROR:
845 # This should have been a syntax error; forget it.
846 pass
847 elif outcome == FAIL:
848 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000849 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000850 elif outcome == SUCCEED:
851 if result is not None:
852 # Matched, as expected, so now we compute the
853 # result string and compare it to our expected result.
854 start, end = result.span(0)
855 vardict={'found': result.group(0),
856 'groups': result.group(),
857 'flags': result.re.flags}
858 for i in range(1, 100):
859 try:
860 gi = result.group(i)
861 # Special hack because else the string concat fails:
862 if gi is None:
863 gi = "None"
864 except IndexError:
865 gi = "Error"
866 vardict['g%d' % i] = gi
867 for i in result.re.groupindex.keys():
868 try:
869 gi = result.group(i)
870 if gi is None:
871 gi = "None"
872 except IndexError:
873 gi = "Error"
874 vardict[i] = gi
875 repl = eval(repl, vardict)
876 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000877 print('=== grouping error', t, end=' ')
878 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000879 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000880 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000881
Antoine Pitrou22628c42008-07-22 17:53:22 +0000882 # Try the match with both pattern and string converted to
883 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000884 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000885 bpat = bytes(pattern, "ascii")
886 bs = bytes(s, "ascii")
887 except UnicodeEncodeError:
888 # skip non-ascii tests
889 pass
890 else:
891 try:
892 bpat = re.compile(bpat)
893 except Exception:
894 print('=== Fails on bytes pattern compile', t)
895 if verbose:
896 traceback.print_exc(file=sys.stdout)
897 else:
898 bytes_result = bpat.search(bs)
899 if bytes_result is None:
900 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000901
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000902 # Try the match with the search area limited to the extent
903 # of the match and see if it still succeeds. \B will
904 # break (because it won't match at the end or start of a
905 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000906
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000907 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
908 and result is not None:
909 obj = re.compile(pattern)
910 result = obj.search(s, result.start(0), result.end(0) + 1)
911 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000912 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000913
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000914 # Try the match with IGNORECASE enabled, and check that it
915 # still succeeds.
916 obj = re.compile(pattern, re.IGNORECASE)
917 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000918 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000919 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000920
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000921 # Try the match with LOCALE enabled, and check that it
922 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +0000923 if '(?u)' not in pattern:
924 obj = re.compile(pattern, re.LOCALE)
925 result = obj.search(s)
926 if result is None:
927 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000928
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000929 # Try the match with UNICODE locale enabled, and check
930 # that it still succeeds.
931 obj = re.compile(pattern, re.UNICODE)
932 result = obj.search(s)
933 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000934 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000935
Gregory P. Smith5a631832010-07-27 05:31:29 +0000936
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000937def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000938 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000939 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000940
941if __name__ == "__main__":
942 test_main()