blob: 5ad44dd28bd3ac8bb7b9a396da0804985066e210 [file] [log] [blame]
Brett Cannon1cd02472008-09-09 01:52:27 +00001from test.support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02004import os
5import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020013# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000014# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000086 def test_bug_1661(self):
87 # Verify that flags do not get silently ignored with compiled patterns
88 pattern = re.compile('.')
89 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
90 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
91 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
92 self.assertRaises(ValueError, re.compile, pattern, re.I)
93
Guido van Rossum92f8f3e2008-09-10 14:30:50 +000094 def test_bug_3629(self):
95 # A regex that triggered a bug in the sre-code validator
96 re.compile("(?P<quote>)(?(quote))")
97
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000098 def test_sub_template_numeric_escape(self):
99 # bug 776311 and friends
100 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
101 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
102 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
103 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
104 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
105 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
106 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
107
108 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
109 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
110
111 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
112 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
113 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
114 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
115 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
116
117 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
118 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000119
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000120 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
121 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
122 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
123 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
124 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
125 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
126 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
127 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
128 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
129 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
130 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
131 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
132
133 # in python2.3 (etc), these loop endlessly in sre_parser.py
134 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
135 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
136 'xz8')
137 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
138 'xza')
139
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000140 def test_qualified_re_sub(self):
141 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
142 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000143
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000144 def test_bug_114660(self):
145 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
146 'hello there')
147
148 def test_bug_462270(self):
149 # Test for empty sub() behaviour, see SF bug #462270
150 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
151 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
152
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000153 def test_symbolic_refs(self):
154 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
155 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
156 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
157 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
158 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
159 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
160 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
161 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000162 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000163
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000164 def test_re_subn(self):
165 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
166 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
167 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
168 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
169 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000170
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000171 def test_re_split(self):
172 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
173 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
174 self.assertEqual(re.split("(:*)", ":a:b::c"),
175 ['', ':', 'a', ':', 'b', '::', 'c'])
176 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
177 self.assertEqual(re.split("(:)*", ":a:b::c"),
178 ['', ':', 'a', ':', 'b', ':', 'c'])
179 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
180 ['', ':', 'a', ':b::', 'c'])
181 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
182 ['', None, ':', 'a', None, ':', '', 'b', None, '',
183 None, '::', 'c'])
184 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
185 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000186
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000187 def test_qualified_re_split(self):
188 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
189 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
190 self.assertEqual(re.split("(:)", ":a:b::c", 2),
191 ['', ':', 'a', ':', 'b::c'])
192 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
193 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_findall(self):
196 self.assertEqual(re.findall(":+", "abc"), [])
197 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
198 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
199 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
200 (":", ":"),
201 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000202
Skip Montanaro5ba00542003-04-25 16:00:14 +0000203 def test_bug_117612(self):
204 self.assertEqual(re.findall(r"(a|(b))", "aba"),
205 [("a", ""),("b", "b"),("a", "")])
206
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000207 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000208 self.assertEqual(re.match('a', 'a').groups(), ())
209 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
210 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
211 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
212 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000213
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000214 pat = re.compile('((a)|(b))(c)?')
215 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
216 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
217 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
218 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
219 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000220
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000221 # A single group
222 m = re.match('(a)', 'a')
223 self.assertEqual(m.group(0), 'a')
224 self.assertEqual(m.group(0), 'a')
225 self.assertEqual(m.group(1), 'a')
226 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000227
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000228 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
229 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
230 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
231 (None, 'b', None))
232 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000233
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000234 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000235 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
236 ('(', 'a'))
237 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
238 (None, 'a'))
239 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
240 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
241 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
242 ('a', 'b'))
243 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
244 (None, 'd'))
245 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
246 (None, 'd'))
247 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
248 ('a', ''))
249
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000250 # Tests for bug #1177831: exercise groups other than the first group
251 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
252 self.assertEqual(p.match('abc').groups(),
253 ('a', 'b', 'c'))
254 self.assertEqual(p.match('ad').groups(),
255 ('a', None, 'd'))
256 self.assertEqual(p.match('abd'), None)
257 self.assertEqual(p.match('ac'), None)
258
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000259
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000260 def test_re_groupref(self):
261 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
262 ('|', 'a'))
263 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
264 (None, 'a'))
265 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
266 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
267 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
268 ('a', 'a'))
269 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
270 (None, None))
271
272 def test_groupdict(self):
273 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
274 'first second').groupdict(),
275 {'first':'first', 'second':'second'})
276
277 def test_expand(self):
278 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
279 "first second")
280 .expand(r"\2 \1 \g<second> \g<first>"),
281 "second first second first")
282
283 def test_repeat_minmax(self):
284 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
285 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
286 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
287 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
288
289 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
290 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
291 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
292 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
293 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
294 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
295 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
296 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
297
298 self.assertEqual(re.match("^x{1}$", "xxx"), None)
299 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
300 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
301 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
302
303 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
304 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
305 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
306 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
307 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
308 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
309 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
310 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
311
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000312 self.assertEqual(re.match("^x{}$", "xxx"), None)
313 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
314
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000315 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000316 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000317 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000318 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
319 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
320 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
321 {'first': 1, 'other': 2})
322
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000323 self.assertEqual(re.match("(a)", "a").pos, 0)
324 self.assertEqual(re.match("(a)", "a").endpos, 1)
325 self.assertEqual(re.match("(a)", "a").string, "a")
326 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
327 self.assertNotEqual(re.match("(a)", "a").re, None)
328
329 def test_special_escapes(self):
330 self.assertEqual(re.search(r"\b(b.)\b",
331 "abcd abc bcd bx").group(1), "bx")
332 self.assertEqual(re.search(r"\B(b.)\B",
333 "abc bcd bc abxd").group(1), "bx")
334 self.assertEqual(re.search(r"\b(b.)\b",
335 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
336 self.assertEqual(re.search(r"\B(b.)\B",
337 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
338 self.assertEqual(re.search(r"\b(b.)\b",
339 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
340 self.assertEqual(re.search(r"\B(b.)\B",
341 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
342 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
343 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
344 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
345 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000346 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000347 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000348 "abc bcd bc abxd").group(1), "bx")
349 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
350 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
351 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000352 self.assertEqual(re.search(r"\d\D\w\W\s\S",
353 "1aa! a").group(0), "1aa! a")
354 self.assertEqual(re.search(r"\d\D\w\W\s\S",
355 "1aa! a", re.LOCALE).group(0), "1aa! a")
356 self.assertEqual(re.search(r"\d\D\w\W\s\S",
357 "1aa! a", re.UNICODE).group(0), "1aa! a")
358
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000359 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000360 self.assertEqual(re.match("([\u2222\u2223])",
361 "\u2222").group(1), "\u2222")
362 self.assertEqual(re.match("([\u2222\u2223])",
363 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364
365 def test_anyall(self):
366 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
367 "a\nb")
368 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
369 "a\n\nb")
370
371 def test_non_consuming(self):
372 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
373 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
374 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
375 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
376 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
377 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
378 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
379
380 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
381 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
382 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
383 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
384
385 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000386 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
387 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000388 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
389 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
390 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
391 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
392 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
393 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
394 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
395 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
396
397 def test_category(self):
398 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
399
400 def test_getlower(self):
401 import _sre
402 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
403 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
404 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
405
406 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000407 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000408
409 def test_not_literal(self):
410 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
411 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
412
413 def test_search_coverage(self):
414 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
415 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
416
Ezio Melottid2114eb2011-03-25 14:08:44 +0200417 def assertMatch(self, pattern, text, match=None, span=None,
418 matcher=re.match):
419 if match is None and span is None:
420 # the pattern matches the whole text
421 match = text
422 span = (0, len(text))
423 elif match is None or span is None:
424 raise ValueError('If match is not None, span should be specified '
425 '(and vice versa).')
426 m = matcher(pattern, text)
427 self.assertTrue(m)
428 self.assertEqual(m.group(), match)
429 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000430
Ezio Melottid2114eb2011-03-25 14:08:44 +0200431 def test_re_escape(self):
432 alnum_chars = string.ascii_letters + string.digits
433 p = ''.join(chr(i) for i in range(256))
434 for c in p:
435 if c in alnum_chars:
436 self.assertEqual(re.escape(c), c)
437 elif c == '\x00':
438 self.assertEqual(re.escape(c), '\\000')
439 else:
440 self.assertEqual(re.escape(c), '\\' + c)
441 self.assertMatch(re.escape(c), c)
442 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000443
Guido van Rossum698280d2008-09-10 17:44:35 +0000444 def test_re_escape_byte(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200445 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
446 p = bytes(range(256))
447 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000448 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200449 if b in alnum_chars:
450 self.assertEqual(re.escape(b), b)
451 elif i == 0:
452 self.assertEqual(re.escape(b), b'\\000')
453 else:
454 self.assertEqual(re.escape(b), b'\\' + b)
455 self.assertMatch(re.escape(b), b)
456 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000457
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200458 def test_re_escape_non_ascii(self):
459 s = 'xxx\u2620\u2620\u2620xxx'
460 s_escaped = re.escape(s)
461 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
462 self.assertMatch(s_escaped, s)
463 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
464 'x\u2620\u2620\u2620x', (2, 7), re.search)
465
466 def test_re_escape_non_ascii_bytes(self):
467 b = 'y\u2620y\u2620y'.encode('utf-8')
468 b_escaped = re.escape(b)
469 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
470 self.assertMatch(b_escaped, b)
471 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
472 self.assertEqual(len(res), 2)
473
Skip Montanaro1e703c62003-04-25 15:40:28 +0000474 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000475 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
476 s = pickle.dumps(oldpat)
477 newpat = pickle.loads(s)
478 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000479
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000480 def test_constants(self):
481 self.assertEqual(re.I, re.IGNORECASE)
482 self.assertEqual(re.L, re.LOCALE)
483 self.assertEqual(re.M, re.MULTILINE)
484 self.assertEqual(re.S, re.DOTALL)
485 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000486
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000487 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000488 for flag in [re.I, re.M, re.X, re.S, re.L]:
489 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000490
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000491 def test_sre_character_literals(self):
492 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
493 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
494 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
495 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
496 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
497 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
498 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
499 self.assertRaises(re.error, re.match, "\911", "")
500
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000501 def test_sre_character_class_literals(self):
502 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
503 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
504 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
505 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
506 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
507 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
508 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
509 self.assertRaises(re.error, re.match, "[\911]", "")
510
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000511 def test_bug_113254(self):
512 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
513 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
514 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
515
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000516 def test_bug_527371(self):
517 # bug described in patches 527371/672491
518 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
519 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
520 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
521 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
522 self.assertEqual(re.match("((a))", "a").lastindex, 1)
523
524 def test_bug_545855(self):
525 # bug 545855 -- This pattern failed to cause a compile error as it
526 # should, instead provoking a TypeError.
527 self.assertRaises(re.error, re.compile, 'foo[a-')
528
529 def test_bug_418626(self):
530 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
531 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
532 # pattern '*?' on a long string.
533 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
534 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
535 20003)
536 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000537 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000538 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000539 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000540
541 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000542 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000543 self.assertEqual(re.compile(pat) and 1, 1)
544
Skip Montanaro1e703c62003-04-25 15:40:28 +0000545 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000546 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000547 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000548 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
549 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
550 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000551
552 def test_scanner(self):
553 def s_ident(scanner, token): return token
554 def s_operator(scanner, token): return "op%s" % token
555 def s_float(scanner, token): return float(token)
556 def s_int(scanner, token): return int(token)
557
558 scanner = Scanner([
559 (r"[a-zA-Z_]\w*", s_ident),
560 (r"\d+\.\d*", s_float),
561 (r"\d+", s_int),
562 (r"=|\+|-|\*|/", s_operator),
563 (r"\s+", None),
564 ])
565
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000566 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
567
Skip Montanaro1e703c62003-04-25 15:40:28 +0000568 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
569 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
570 'op+', 'bar'], ''))
571
Skip Montanaro5ba00542003-04-25 16:00:14 +0000572 def test_bug_448951(self):
573 # bug 448951 (similar to 429357, but with single char match)
574 # (Also test greedy matches.)
575 for op in '','?','*':
576 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
577 (None, None))
578 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
579 ('a:', 'a'))
580
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000581 def test_bug_725106(self):
582 # capturing groups in alternatives in repeats
583 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
584 ('b', 'a'))
585 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
586 ('c', 'b'))
587 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
588 ('b', None))
589 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
590 ('b', None))
591 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
592 ('b', 'a'))
593 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
594 ('c', 'b'))
595 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
596 ('b', None))
597 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
598 ('b', None))
599
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000600 def test_bug_725149(self):
601 # mark_stack_base restoring before restoring marks
602 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
603 ('a', None))
604 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
605 ('a', None, None))
606
Just van Rossum12723ba2003-07-02 20:03:04 +0000607 def test_bug_764548(self):
608 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000609 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000610 pat = re.compile(my_unicode("abc"))
611 self.assertEqual(pat.match("xyz"), None)
612
Skip Montanaro5ba00542003-04-25 16:00:14 +0000613 def test_finditer(self):
614 iter = re.finditer(r":+", "a:b::c:::d")
615 self.assertEqual([item.group(0) for item in iter],
616 [":", "::", ":::"])
617
Thomas Wouters40a088d2008-03-18 20:19:54 +0000618 def test_bug_926075(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000619 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000620 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000621
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000622 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000623 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000624 self.assertEqual(re.compile(pattern).split("a.b.c"),
625 ['a','b','c'])
626
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000627 def test_bug_581080(self):
628 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000629 self.assertEqual(next(iter).span(), (1,2))
630 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000631
632 scanner = re.compile(r"\s").scanner("a b")
633 self.assertEqual(scanner.search().span(), (1, 2))
634 self.assertEqual(scanner.search(), None)
635
636 def test_bug_817234(self):
637 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000638 self.assertEqual(next(iter).span(), (0, 4))
639 self.assertEqual(next(iter).span(), (4, 4))
640 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000641
Guido van Rossumd8faa362007-04-27 19:54:29 +0000642 def test_empty_array(self):
643 # SF buf 1647541
644 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000645 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000646 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000647 self.assertEqual(re.compile(b"bla").match(a), None)
648 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000649
Christian Heimes072c0f12008-01-03 23:01:04 +0000650 def test_inline_flags(self):
651 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000652 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
653 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000654
655 p = re.compile(upper_char, re.I | re.U)
656 q = p.match(lower_char)
657 self.assertNotEqual(q, None)
658
659 p = re.compile(lower_char, re.I | re.U)
660 q = p.match(upper_char)
661 self.assertNotEqual(q, None)
662
663 p = re.compile('(?i)' + upper_char, re.U)
664 q = p.match(lower_char)
665 self.assertNotEqual(q, None)
666
667 p = re.compile('(?i)' + lower_char, re.U)
668 q = p.match(upper_char)
669 self.assertNotEqual(q, None)
670
671 p = re.compile('(?iu)' + upper_char)
672 q = p.match(lower_char)
673 self.assertNotEqual(q, None)
674
675 p = re.compile('(?iu)' + lower_char)
676 q = p.match(upper_char)
677 self.assertNotEqual(q, None)
678
Christian Heimes25bb7832008-01-11 16:17:00 +0000679 def test_dollar_matches_twice(self):
680 "$ matches the end of string, and just before the terminating \n"
681 pattern = re.compile('$')
682 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
683 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
684 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
685
686 pattern = re.compile('$', re.MULTILINE)
687 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
688 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
689 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
690
Antoine Pitroufd036452008-08-19 17:56:33 +0000691 def test_bytes_str_mixing(self):
692 # Mixing str and bytes is disallowed
693 pat = re.compile('.')
694 bpat = re.compile(b'.')
695 self.assertRaises(TypeError, pat.match, b'b')
696 self.assertRaises(TypeError, bpat.match, 'b')
697 self.assertRaises(TypeError, pat.sub, b'b', 'c')
698 self.assertRaises(TypeError, pat.sub, 'b', b'c')
699 self.assertRaises(TypeError, pat.sub, b'b', b'c')
700 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
701 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
702 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
703
704 def test_ascii_and_unicode_flag(self):
705 # String patterns
706 for flags in (0, re.UNICODE):
707 pat = re.compile('\xc0', flags | re.IGNORECASE)
708 self.assertNotEqual(pat.match('\xe0'), None)
709 pat = re.compile('\w', flags)
710 self.assertNotEqual(pat.match('\xe0'), None)
711 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
712 self.assertEqual(pat.match('\xe0'), None)
713 pat = re.compile('(?a)\xc0', re.IGNORECASE)
714 self.assertEqual(pat.match('\xe0'), None)
715 pat = re.compile('\w', re.ASCII)
716 self.assertEqual(pat.match('\xe0'), None)
717 pat = re.compile('(?a)\w')
718 self.assertEqual(pat.match('\xe0'), None)
719 # Bytes patterns
720 for flags in (0, re.ASCII):
721 pat = re.compile(b'\xc0', re.IGNORECASE)
722 self.assertEqual(pat.match(b'\xe0'), None)
723 pat = re.compile(b'\w')
724 self.assertEqual(pat.match(b'\xe0'), None)
725 # Incompatibilities
726 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
727 self.assertRaises(ValueError, re.compile, b'(?u)\w')
728 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
729 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
730 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
731 self.assertRaises(ValueError, re.compile, '(?au)\w')
732
Ezio Melottidab886a2010-03-06 15:27:04 +0000733 def test_bug_6509(self):
734 # Replacement strings of both types must parse properly.
735 # all strings
736 pat = re.compile('a(\w)')
737 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
738 pat = re.compile('a(.)')
739 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
740 pat = re.compile('..')
741 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
742
743 # all bytes
744 pat = re.compile(b'a(\w)')
745 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
746 pat = re.compile(b'a(.)')
747 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
748 pat = re.compile(b'..')
749 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
750
Antoine Pitrou0560e8a2010-01-14 17:37:24 +0000751 def test_dealloc(self):
752 # issue 3299: check for segfault in debug build
753 import _sre
Ezio Melotti68338cd2010-01-23 10:54:37 +0000754 # the overflow limit is different on wide and narrow builds and it
755 # depends on the definition of SRE_CODE (see sre.h).
756 # 2**128 should be big enough to overflow on both. For smaller values
757 # a RuntimeError is raised instead of OverflowError.
758 long_overflow = 2**128
Antoine Pitrou0560e8a2010-01-14 17:37:24 +0000759 self.assertRaises(TypeError, re.finditer, "a", {})
760 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner26c966b2010-03-04 22:01:47 +0000761 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000762
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000763def run_re_tests():
764 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
765 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000766 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000767 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000768 # To save time, only run the first and last 10 tests
769 #tests = tests[:10] + tests[-10:]
770 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000771
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000772 for t in tests:
773 sys.stdout.flush()
774 pattern = s = outcome = repl = expected = None
775 if len(t) == 5:
776 pattern, s, outcome, repl, expected = t
777 elif len(t) == 3:
778 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000779 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000780 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000781
Guido van Rossum41360a41998-03-26 19:42:58 +0000782 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000783 obj = re.compile(pattern)
784 except re.error:
785 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000786 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000787 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000788 except KeyboardInterrupt: raise KeyboardInterrupt
789 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000790 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000791 if verbose:
792 traceback.print_exc(file=sys.stdout)
793 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000794 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000795 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000796 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000797 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000798 if outcome == SYNTAX_ERROR:
799 # This should have been a syntax error; forget it.
800 pass
801 elif outcome == FAIL:
802 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000803 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000804 elif outcome == SUCCEED:
805 if result is not None:
806 # Matched, as expected, so now we compute the
807 # result string and compare it to our expected result.
808 start, end = result.span(0)
809 vardict={'found': result.group(0),
810 'groups': result.group(),
811 'flags': result.re.flags}
812 for i in range(1, 100):
813 try:
814 gi = result.group(i)
815 # Special hack because else the string concat fails:
816 if gi is None:
817 gi = "None"
818 except IndexError:
819 gi = "Error"
820 vardict['g%d' % i] = gi
821 for i in result.re.groupindex.keys():
822 try:
823 gi = result.group(i)
824 if gi is None:
825 gi = "None"
826 except IndexError:
827 gi = "Error"
828 vardict[i] = gi
829 repl = eval(repl, vardict)
830 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000831 print('=== grouping error', t, end=' ')
832 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000833 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000834 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000835
Antoine Pitrou22628c42008-07-22 17:53:22 +0000836 # Try the match with both pattern and string converted to
837 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000838 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000839 bpat = bytes(pattern, "ascii")
840 bs = bytes(s, "ascii")
841 except UnicodeEncodeError:
842 # skip non-ascii tests
843 pass
844 else:
845 try:
846 bpat = re.compile(bpat)
847 except Exception:
848 print('=== Fails on bytes pattern compile', t)
849 if verbose:
850 traceback.print_exc(file=sys.stdout)
851 else:
852 bytes_result = bpat.search(bs)
853 if bytes_result is None:
854 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000855
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000856 # Try the match with the search area limited to the extent
857 # of the match and see if it still succeeds. \B will
858 # break (because it won't match at the end or start of a
859 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000860
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000861 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
862 and result is not None:
863 obj = re.compile(pattern)
864 result = obj.search(s, result.start(0), result.end(0) + 1)
865 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000866 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000867
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000868 # Try the match with IGNORECASE enabled, and check that it
869 # still succeeds.
870 obj = re.compile(pattern, re.IGNORECASE)
871 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000872 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000873 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000874
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000875 # Try the match with LOCALE enabled, and check that it
876 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +0000877 if '(?u)' not in pattern:
878 obj = re.compile(pattern, re.LOCALE)
879 result = obj.search(s)
880 if result is None:
881 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000882
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000883 # Try the match with UNICODE locale enabled, and check
884 # that it still succeeds.
885 obj = re.compile(pattern, re.UNICODE)
886 result = obj.search(s)
887 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000888 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000889
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000890def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000891 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000892 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000893
894if __name__ == "__main__":
895 test_main()