blob: d23c49b07ea64a90ae91d7448db3441b7b371226 [file] [log] [blame]
Brett Cannon1cd02472008-09-09 01:52:27 +00001from test.support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02004import sys
5import string
6import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00007from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020012# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000018
19 def test_weakref(self):
20 s = 'QabbbcR'
21 x = re.compile('ab+c')
22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24
Skip Montanaro8ed06da2003-04-24 19:43:18 +000025 def test_search_star_plus(self):
26 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000030 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000031 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000035 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000036
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000038 int_value = int(matchobj.group(0))
39 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_basic_re_sub(self):
42 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000047
Skip Montanaro8ed06da2003-04-24 19:43:18 +000048 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 s = r"\1\1"
52 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
54 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000068
Skip Montanaro2726fcd2003-04-25 14:31:54 +000069 def test_bug_449964(self):
70 # fails for group followed by other escape
71 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b')
73
74 def test_bug_449000(self):
75 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000084
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000085 def test_bug_1661(self):
86 # Verify that flags do not get silently ignored with compiled patterns
87 pattern = re.compile('.')
88 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
89 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
90 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
91 self.assertRaises(ValueError, re.compile, pattern, re.I)
92
Guido van Rossum92f8f3e2008-09-10 14:30:50 +000093 def test_bug_3629(self):
94 # A regex that triggered a bug in the sre-code validator
95 re.compile("(?P<quote>)(?(quote))")
96
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000097 def test_sub_template_numeric_escape(self):
98 # bug 776311 and friends
99 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
100 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
101 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
102 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
103 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
104 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
105 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
106
107 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
108 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
109
110 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
111 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
112 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
113 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
114 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
115
116 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
117 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000118
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000119 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
120 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
121 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
122 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
123 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
124 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
125 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
126 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
127 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
128 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
129 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
130 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
131
132 # in python2.3 (etc), these loop endlessly in sre_parser.py
133 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
134 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
135 'xz8')
136 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
137 'xza')
138
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000139 def test_qualified_re_sub(self):
140 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
141 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000142
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000143 def test_bug_114660(self):
144 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
145 'hello there')
146
147 def test_bug_462270(self):
148 # Test for empty sub() behaviour, see SF bug #462270
149 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
150 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_symbolic_refs(self):
153 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
154 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
155 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
156 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
157 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
158 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
159 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
160 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000161 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000162
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000163 def test_re_subn(self):
164 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
165 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
166 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
167 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
168 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000169
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000170 def test_re_split(self):
171 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
172 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
173 self.assertEqual(re.split("(:*)", ":a:b::c"),
174 ['', ':', 'a', ':', 'b', '::', 'c'])
175 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
176 self.assertEqual(re.split("(:)*", ":a:b::c"),
177 ['', ':', 'a', ':', 'b', ':', 'c'])
178 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
179 ['', ':', 'a', ':b::', 'c'])
180 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
181 ['', None, ':', 'a', None, ':', '', 'b', None, '',
182 None, '::', 'c'])
183 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
184 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000185
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000186 def test_qualified_re_split(self):
187 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
188 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
189 self.assertEqual(re.split("(:)", ":a:b::c", 2),
190 ['', ':', 'a', ':', 'b::c'])
191 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
192 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000193
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000194 def test_re_findall(self):
195 self.assertEqual(re.findall(":+", "abc"), [])
196 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
197 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
198 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
199 (":", ":"),
200 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000201
Skip Montanaro5ba00542003-04-25 16:00:14 +0000202 def test_bug_117612(self):
203 self.assertEqual(re.findall(r"(a|(b))", "aba"),
204 [("a", ""),("b", "b"),("a", "")])
205
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000206 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000207 self.assertEqual(re.match('a', 'a').groups(), ())
208 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
209 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
210 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
211 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000212
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000213 pat = re.compile('((a)|(b))(c)?')
214 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
215 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
216 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
217 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
218 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 # A single group
221 m = re.match('(a)', 'a')
222 self.assertEqual(m.group(0), 'a')
223 self.assertEqual(m.group(0), 'a')
224 self.assertEqual(m.group(1), 'a')
225 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000227 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
228 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
229 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
230 (None, 'b', None))
231 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000232
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000233 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000234 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
235 ('(', 'a'))
236 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
237 (None, 'a'))
238 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
239 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
240 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
241 ('a', 'b'))
242 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
243 (None, 'd'))
244 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
245 (None, 'd'))
246 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
247 ('a', ''))
248
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000249 # Tests for bug #1177831: exercise groups other than the first group
250 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
251 self.assertEqual(p.match('abc').groups(),
252 ('a', 'b', 'c'))
253 self.assertEqual(p.match('ad').groups(),
254 ('a', None, 'd'))
255 self.assertEqual(p.match('abd'), None)
256 self.assertEqual(p.match('ac'), None)
257
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000258
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000259 def test_re_groupref(self):
260 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
261 ('|', 'a'))
262 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
263 (None, 'a'))
264 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
265 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
266 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
267 ('a', 'a'))
268 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
269 (None, None))
270
271 def test_groupdict(self):
272 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
273 'first second').groupdict(),
274 {'first':'first', 'second':'second'})
275
276 def test_expand(self):
277 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
278 "first second")
279 .expand(r"\2 \1 \g<second> \g<first>"),
280 "second first second first")
281
282 def test_repeat_minmax(self):
283 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
284 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
285 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
286 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
287
288 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
289 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
290 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
291 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
292 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
293 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
294 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
295 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
296
297 self.assertEqual(re.match("^x{1}$", "xxx"), None)
298 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
299 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
300 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
301
302 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
303 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
304 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
305 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
306 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
307 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
308 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
309 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
310
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000311 self.assertEqual(re.match("^x{}$", "xxx"), None)
312 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
313
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000314 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000315 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000316 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000317 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
318 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
319 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
320 {'first': 1, 'other': 2})
321
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000322 self.assertEqual(re.match("(a)", "a").pos, 0)
323 self.assertEqual(re.match("(a)", "a").endpos, 1)
324 self.assertEqual(re.match("(a)", "a").string, "a")
325 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
326 self.assertNotEqual(re.match("(a)", "a").re, None)
327
328 def test_special_escapes(self):
329 self.assertEqual(re.search(r"\b(b.)\b",
330 "abcd abc bcd bx").group(1), "bx")
331 self.assertEqual(re.search(r"\B(b.)\B",
332 "abc bcd bc abxd").group(1), "bx")
333 self.assertEqual(re.search(r"\b(b.)\b",
334 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
335 self.assertEqual(re.search(r"\B(b.)\B",
336 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
337 self.assertEqual(re.search(r"\b(b.)\b",
338 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
339 self.assertEqual(re.search(r"\B(b.)\B",
340 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
341 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
342 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
343 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
344 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000345 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000346 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000347 "abc bcd bc abxd").group(1), "bx")
348 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
349 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
350 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000351 self.assertEqual(re.search(r"\d\D\w\W\s\S",
352 "1aa! a").group(0), "1aa! a")
353 self.assertEqual(re.search(r"\d\D\w\W\s\S",
354 "1aa! a", re.LOCALE).group(0), "1aa! a")
355 self.assertEqual(re.search(r"\d\D\w\W\s\S",
356 "1aa! a", re.UNICODE).group(0), "1aa! a")
357
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000358 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000359 self.assertEqual(re.match("([\u2222\u2223])",
360 "\u2222").group(1), "\u2222")
361 self.assertEqual(re.match("([\u2222\u2223])",
362 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000363
364 def test_anyall(self):
365 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
366 "a\nb")
367 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
368 "a\n\nb")
369
370 def test_non_consuming(self):
371 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
372 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
373 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
374 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
375 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
376 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
377 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
378
379 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
380 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
381 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
382 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
383
384 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000385 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
386 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000387 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
388 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
389 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
390 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
391 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
392 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
393 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
394 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
395
396 def test_category(self):
397 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
398
399 def test_getlower(self):
400 import _sre
401 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
402 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
403 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
404
405 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000406 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000407
408 def test_not_literal(self):
409 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
410 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
411
412 def test_search_coverage(self):
413 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
414 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
415
Ezio Melottid2114eb2011-03-25 14:08:44 +0200416 def assertMatch(self, pattern, text, match=None, span=None,
417 matcher=re.match):
418 if match is None and span is None:
419 # the pattern matches the whole text
420 match = text
421 span = (0, len(text))
422 elif match is None or span is None:
423 raise ValueError('If match is not None, span should be specified '
424 '(and vice versa).')
425 m = matcher(pattern, text)
426 self.assertTrue(m)
427 self.assertEqual(m.group(), match)
428 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000429
Ezio Melottid2114eb2011-03-25 14:08:44 +0200430 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300431 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200432 p = ''.join(chr(i) for i in range(256))
433 for c in p:
434 if c in alnum_chars:
435 self.assertEqual(re.escape(c), c)
436 elif c == '\x00':
437 self.assertEqual(re.escape(c), '\\000')
438 else:
439 self.assertEqual(re.escape(c), '\\' + c)
440 self.assertMatch(re.escape(c), c)
441 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000442
Guido van Rossum698280d2008-09-10 17:44:35 +0000443 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300444 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200445 p = bytes(range(256))
446 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000447 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200448 if b in alnum_chars:
449 self.assertEqual(re.escape(b), b)
450 elif i == 0:
451 self.assertEqual(re.escape(b), b'\\000')
452 else:
453 self.assertEqual(re.escape(b), b'\\' + b)
454 self.assertMatch(re.escape(b), b)
455 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000456
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200457 def test_re_escape_non_ascii(self):
458 s = 'xxx\u2620\u2620\u2620xxx'
459 s_escaped = re.escape(s)
460 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
461 self.assertMatch(s_escaped, s)
462 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
463 'x\u2620\u2620\u2620x', (2, 7), re.search)
464
465 def test_re_escape_non_ascii_bytes(self):
466 b = 'y\u2620y\u2620y'.encode('utf-8')
467 b_escaped = re.escape(b)
468 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
469 self.assertMatch(b_escaped, b)
470 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
471 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000472
Skip Montanaro1e703c62003-04-25 15:40:28 +0000473 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000474 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
475 s = pickle.dumps(oldpat)
476 newpat = pickle.loads(s)
477 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000478
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000479 def test_constants(self):
480 self.assertEqual(re.I, re.IGNORECASE)
481 self.assertEqual(re.L, re.LOCALE)
482 self.assertEqual(re.M, re.MULTILINE)
483 self.assertEqual(re.S, re.DOTALL)
484 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000485
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000486 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000487 for flag in [re.I, re.M, re.X, re.S, re.L]:
488 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000489
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000490 def test_sre_character_literals(self):
491 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
492 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
493 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
494 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
495 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
496 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
497 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
498 self.assertRaises(re.error, re.match, "\911", "")
499
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000500 def test_sre_character_class_literals(self):
501 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
502 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
503 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
504 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
505 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
506 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
507 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
508 self.assertRaises(re.error, re.match, "[\911]", "")
509
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000510 def test_bug_113254(self):
511 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
512 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
513 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
514
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000515 def test_bug_527371(self):
516 # bug described in patches 527371/672491
517 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
518 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
519 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
520 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
521 self.assertEqual(re.match("((a))", "a").lastindex, 1)
522
523 def test_bug_545855(self):
524 # bug 545855 -- This pattern failed to cause a compile error as it
525 # should, instead provoking a TypeError.
526 self.assertRaises(re.error, re.compile, 'foo[a-')
527
528 def test_bug_418626(self):
529 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
530 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
531 # pattern '*?' on a long string.
532 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
533 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
534 20003)
535 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000536 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000537 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000538 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000539
540 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000541 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000542 self.assertEqual(re.compile(pat) and 1, 1)
543
Skip Montanaro1e703c62003-04-25 15:40:28 +0000544 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000545 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000546 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000547 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
548 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
549 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000550
551 def test_scanner(self):
552 def s_ident(scanner, token): return token
553 def s_operator(scanner, token): return "op%s" % token
554 def s_float(scanner, token): return float(token)
555 def s_int(scanner, token): return int(token)
556
557 scanner = Scanner([
558 (r"[a-zA-Z_]\w*", s_ident),
559 (r"\d+\.\d*", s_float),
560 (r"\d+", s_int),
561 (r"=|\+|-|\*|/", s_operator),
562 (r"\s+", None),
563 ])
564
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000565 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
566
Skip Montanaro1e703c62003-04-25 15:40:28 +0000567 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
568 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
569 'op+', 'bar'], ''))
570
Skip Montanaro5ba00542003-04-25 16:00:14 +0000571 def test_bug_448951(self):
572 # bug 448951 (similar to 429357, but with single char match)
573 # (Also test greedy matches.)
574 for op in '','?','*':
575 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
576 (None, None))
577 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
578 ('a:', 'a'))
579
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000580 def test_bug_725106(self):
581 # capturing groups in alternatives in repeats
582 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
583 ('b', 'a'))
584 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
585 ('c', 'b'))
586 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
587 ('b', None))
588 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
589 ('b', None))
590 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
591 ('b', 'a'))
592 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
593 ('c', 'b'))
594 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
595 ('b', None))
596 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
597 ('b', None))
598
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000599 def test_bug_725149(self):
600 # mark_stack_base restoring before restoring marks
601 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
602 ('a', None))
603 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
604 ('a', None, None))
605
Just van Rossum12723ba2003-07-02 20:03:04 +0000606 def test_bug_764548(self):
607 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000608 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000609 pat = re.compile(my_unicode("abc"))
610 self.assertEqual(pat.match("xyz"), None)
611
Skip Montanaro5ba00542003-04-25 16:00:14 +0000612 def test_finditer(self):
613 iter = re.finditer(r":+", "a:b::c:::d")
614 self.assertEqual([item.group(0) for item in iter],
615 [":", "::", ":::"])
616
Thomas Wouters40a088d2008-03-18 20:19:54 +0000617 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000618 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000619 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000620
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000621 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000622 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000623 self.assertEqual(re.compile(pattern).split("a.b.c"),
624 ['a','b','c'])
625
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000626 def test_bug_581080(self):
627 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000628 self.assertEqual(next(iter).span(), (1,2))
629 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000630
631 scanner = re.compile(r"\s").scanner("a b")
632 self.assertEqual(scanner.search().span(), (1, 2))
633 self.assertEqual(scanner.search(), None)
634
635 def test_bug_817234(self):
636 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000637 self.assertEqual(next(iter).span(), (0, 4))
638 self.assertEqual(next(iter).span(), (4, 4))
639 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000640
Mark Dickinson1f268282009-07-28 17:22:36 +0000641 def test_bug_6561(self):
642 # '\d' should match characters in Unicode category 'Nd'
643 # (Number, Decimal Digit), but not those in 'Nl' (Number,
644 # Letter) or 'No' (Number, Other).
645 decimal_digits = [
646 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
647 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
648 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
649 ]
650 for x in decimal_digits:
651 self.assertEqual(re.match('^\d$', x).group(0), x)
652
653 not_decimal_digits = [
654 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
655 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
656 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
657 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
658 ]
659 for x in not_decimal_digits:
660 self.assertIsNone(re.match('^\d$', x))
661
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662 def test_empty_array(self):
663 # SF buf 1647541
664 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000665 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000666 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000667 self.assertEqual(re.compile(b"bla").match(a), None)
668 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000669
Christian Heimes072c0f12008-01-03 23:01:04 +0000670 def test_inline_flags(self):
671 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000672 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
673 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000674
675 p = re.compile(upper_char, re.I | re.U)
676 q = p.match(lower_char)
677 self.assertNotEqual(q, None)
678
679 p = re.compile(lower_char, re.I | re.U)
680 q = p.match(upper_char)
681 self.assertNotEqual(q, None)
682
683 p = re.compile('(?i)' + upper_char, re.U)
684 q = p.match(lower_char)
685 self.assertNotEqual(q, None)
686
687 p = re.compile('(?i)' + lower_char, re.U)
688 q = p.match(upper_char)
689 self.assertNotEqual(q, None)
690
691 p = re.compile('(?iu)' + upper_char)
692 q = p.match(lower_char)
693 self.assertNotEqual(q, None)
694
695 p = re.compile('(?iu)' + lower_char)
696 q = p.match(upper_char)
697 self.assertNotEqual(q, None)
698
Christian Heimes25bb7832008-01-11 16:17:00 +0000699 def test_dollar_matches_twice(self):
700 "$ matches the end of string, and just before the terminating \n"
701 pattern = re.compile('$')
702 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
703 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
704 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
705
706 pattern = re.compile('$', re.MULTILINE)
707 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
708 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
709 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
710
Antoine Pitroufd036452008-08-19 17:56:33 +0000711 def test_bytes_str_mixing(self):
712 # Mixing str and bytes is disallowed
713 pat = re.compile('.')
714 bpat = re.compile(b'.')
715 self.assertRaises(TypeError, pat.match, b'b')
716 self.assertRaises(TypeError, bpat.match, 'b')
717 self.assertRaises(TypeError, pat.sub, b'b', 'c')
718 self.assertRaises(TypeError, pat.sub, 'b', b'c')
719 self.assertRaises(TypeError, pat.sub, b'b', b'c')
720 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
721 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
722 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
723
724 def test_ascii_and_unicode_flag(self):
725 # String patterns
726 for flags in (0, re.UNICODE):
727 pat = re.compile('\xc0', flags | re.IGNORECASE)
728 self.assertNotEqual(pat.match('\xe0'), None)
729 pat = re.compile('\w', flags)
730 self.assertNotEqual(pat.match('\xe0'), None)
731 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
732 self.assertEqual(pat.match('\xe0'), None)
733 pat = re.compile('(?a)\xc0', re.IGNORECASE)
734 self.assertEqual(pat.match('\xe0'), None)
735 pat = re.compile('\w', re.ASCII)
736 self.assertEqual(pat.match('\xe0'), None)
737 pat = re.compile('(?a)\w')
738 self.assertEqual(pat.match('\xe0'), None)
739 # Bytes patterns
740 for flags in (0, re.ASCII):
741 pat = re.compile(b'\xc0', re.IGNORECASE)
742 self.assertEqual(pat.match(b'\xe0'), None)
743 pat = re.compile(b'\w')
744 self.assertEqual(pat.match(b'\xe0'), None)
745 # Incompatibilities
746 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
747 self.assertRaises(ValueError, re.compile, b'(?u)\w')
748 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
749 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
750 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
751 self.assertRaises(ValueError, re.compile, '(?au)\w')
752
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000753 def test_bug_6509(self):
754 # Replacement strings of both types must parse properly.
755 # all strings
756 pat = re.compile('a(\w)')
757 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
758 pat = re.compile('a(.)')
759 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
760 pat = re.compile('..')
761 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
762
763 # all bytes
764 pat = re.compile(b'a(\w)')
765 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
766 pat = re.compile(b'a(.)')
767 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
768 pat = re.compile(b'..')
769 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
770
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000771 def test_dealloc(self):
772 # issue 3299: check for segfault in debug build
773 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000774 # the overflow limit is different on wide and narrow builds and it
775 # depends on the definition of SRE_CODE (see sre.h).
776 # 2**128 should be big enough to overflow on both. For smaller values
777 # a RuntimeError is raised instead of OverflowError.
778 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000779 self.assertRaises(TypeError, re.finditer, "a", {})
780 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000781 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000782
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200783 def test_search_dot_unicode(self):
784 self.assertIsNotNone(re.search("123.*-", '123abc-'))
785 self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
786 self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
787 self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
788 self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
789
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000790def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000791 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000792 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000793 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000794 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000795 # To save time, only run the first and last 10 tests
796 #tests = tests[:10] + tests[-10:]
797 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000798
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000799 for t in tests:
800 sys.stdout.flush()
801 pattern = s = outcome = repl = expected = None
802 if len(t) == 5:
803 pattern, s, outcome, repl, expected = t
804 elif len(t) == 3:
805 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000806 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000807 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000808
Guido van Rossum41360a41998-03-26 19:42:58 +0000809 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000810 obj = re.compile(pattern)
811 except re.error:
812 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000813 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000814 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000815 except KeyboardInterrupt: raise KeyboardInterrupt
816 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000817 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000818 if verbose:
819 traceback.print_exc(file=sys.stdout)
820 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000821 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000822 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000823 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000824 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000825 if outcome == SYNTAX_ERROR:
826 # This should have been a syntax error; forget it.
827 pass
828 elif outcome == FAIL:
829 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000830 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000831 elif outcome == SUCCEED:
832 if result is not None:
833 # Matched, as expected, so now we compute the
834 # result string and compare it to our expected result.
835 start, end = result.span(0)
836 vardict={'found': result.group(0),
837 'groups': result.group(),
838 'flags': result.re.flags}
839 for i in range(1, 100):
840 try:
841 gi = result.group(i)
842 # Special hack because else the string concat fails:
843 if gi is None:
844 gi = "None"
845 except IndexError:
846 gi = "Error"
847 vardict['g%d' % i] = gi
848 for i in result.re.groupindex.keys():
849 try:
850 gi = result.group(i)
851 if gi is None:
852 gi = "None"
853 except IndexError:
854 gi = "Error"
855 vardict[i] = gi
856 repl = eval(repl, vardict)
857 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000858 print('=== grouping error', t, end=' ')
859 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000860 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000861 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000862
Antoine Pitrou22628c42008-07-22 17:53:22 +0000863 # Try the match with both pattern and string converted to
864 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000865 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000866 bpat = bytes(pattern, "ascii")
867 bs = bytes(s, "ascii")
868 except UnicodeEncodeError:
869 # skip non-ascii tests
870 pass
871 else:
872 try:
873 bpat = re.compile(bpat)
874 except Exception:
875 print('=== Fails on bytes pattern compile', t)
876 if verbose:
877 traceback.print_exc(file=sys.stdout)
878 else:
879 bytes_result = bpat.search(bs)
880 if bytes_result is None:
881 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000882
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000883 # Try the match with the search area limited to the extent
884 # of the match and see if it still succeeds. \B will
885 # break (because it won't match at the end or start of a
886 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000887
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000888 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
889 and result is not None:
890 obj = re.compile(pattern)
891 result = obj.search(s, result.start(0), result.end(0) + 1)
892 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000893 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000894
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000895 # Try the match with IGNORECASE enabled, and check that it
896 # still succeeds.
897 obj = re.compile(pattern, re.IGNORECASE)
898 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000899 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000900 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000901
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000902 # Try the match with LOCALE enabled, and check that it
903 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +0000904 if '(?u)' not in pattern:
905 obj = re.compile(pattern, re.LOCALE)
906 result = obj.search(s)
907 if result is None:
908 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000909
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000910 # Try the match with UNICODE locale enabled, and check
911 # that it still succeeds.
912 obj = re.compile(pattern, re.UNICODE)
913 result = obj.search(s)
914 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000915 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000916
Gregory P. Smith5a631832010-07-27 05:31:29 +0000917
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000918def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000919 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000920 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000921
922if __name__ == "__main__":
923 test_main()