blob: 6b047e48dbb7fa2432fe4d277a50b39e9b2a0f17 [file] [log] [blame]
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
Benjamin Petersone48944b2012-03-07 14:50:25 -06002import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00004from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02005import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Antoine Pitrou1f1888e2012-12-03 20:53:12 +010010from test.test_bigmem import character_size
11
12
Guido van Rossum23b22571997-07-17 22:36:14 +000013# Misc tests from Tim Peters' re.doc
14
Just van Rossum6802c6e2003-07-02 14:36:59 +000015# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020016# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000017# cover most of the code.
18
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000020
Skip Montanaro8ed06da2003-04-24 19:43:18 +000021class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000022
Benjamin Petersone48944b2012-03-07 14:50:25 -060023 def test_keep_buffer(self):
24 # See bug 14212
25 b = bytearray(b'x')
26 it = re.finditer(b'a', b)
27 with self.assertRaises(BufferError):
28 b.extend(b'x'*400)
29 list(it)
30 del it
31 gc_collect()
32 b.extend(b'x'*400)
33
Raymond Hettinger027bb632004-05-31 03:09:25 +000034 def test_weakref(self):
35 s = 'QabbbcR'
36 x = re.compile('ab+c')
37 y = proxy(x)
38 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
39
Skip Montanaro8ed06da2003-04-24 19:43:18 +000040 def test_search_star_plus(self):
41 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
42 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
43 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
44 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000045 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000046 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
47 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
48 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
49 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000050 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000053 int_value = int(matchobj.group(0))
54 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 def test_basic_re_sub(self):
57 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
58 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
59 '9.3 -3 24x100y')
60 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
61 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000062
Skip Montanaro8ed06da2003-04-24 19:43:18 +000063 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
64 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000065
Skip Montanaro8ed06da2003-04-24 19:43:18 +000066 s = r"\1\1"
67 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
68 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
69 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000070
Skip Montanaro8ed06da2003-04-24 19:43:18 +000071 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
72 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
73 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
74 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000075
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
77 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
78 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
79 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
80 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000081
Skip Montanaro8ed06da2003-04-24 19:43:18 +000082 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000083
Skip Montanaro2726fcd2003-04-25 14:31:54 +000084 def test_bug_449964(self):
85 # fails for group followed by other escape
86 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
87 'xx\bxx\b')
88
89 def test_bug_449000(self):
90 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000091 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
92 'abc\ndef\n')
93 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
94 'abc\ndef\n')
95 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
96 'abc\ndef\n')
97 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
98 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000099
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000100 def test_bug_1661(self):
101 # Verify that flags do not get silently ignored with compiled patterns
102 pattern = re.compile('.')
103 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
104 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
105 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
106 self.assertRaises(ValueError, re.compile, pattern, re.I)
107
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000108 def test_bug_3629(self):
109 # A regex that triggered a bug in the sre-code validator
110 re.compile("(?P<quote>)(?(quote))")
111
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000112 def test_sub_template_numeric_escape(self):
113 # bug 776311 and friends
114 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
115 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
116 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
117 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
118 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
119 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
120 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
121
122 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
123 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
124
125 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
126 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
127 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
128 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
129 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
130
131 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
132 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000133
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000134 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
142 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
143 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
144 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
145 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
146
147 # in python2.3 (etc), these loop endlessly in sre_parser.py
148 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
149 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
150 'xz8')
151 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
152 'xza')
153
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000154 def test_qualified_re_sub(self):
155 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
156 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000157
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000158 def test_bug_114660(self):
159 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
160 'hello there')
161
162 def test_bug_462270(self):
163 # Test for empty sub() behaviour, see SF bug #462270
164 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
165 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
166
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200167 def test_symbolic_groups(self):
168 re.compile('(?P<a>x)(?P=a)(?(a)y)')
169 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
170 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
171 self.assertRaises(re.error, re.compile, '(?Px)')
172 self.assertRaises(re.error, re.compile, '(?P=)')
173 self.assertRaises(re.error, re.compile, '(?P=1)')
174 self.assertRaises(re.error, re.compile, '(?P=a)')
175 self.assertRaises(re.error, re.compile, '(?P=a1)')
176 self.assertRaises(re.error, re.compile, '(?P=a.)')
177 self.assertRaises(re.error, re.compile, '(?P<)')
178 self.assertRaises(re.error, re.compile, '(?P<>)')
179 self.assertRaises(re.error, re.compile, '(?P<1>)')
180 self.assertRaises(re.error, re.compile, '(?P<a.>)')
181 self.assertRaises(re.error, re.compile, '(?())')
182 self.assertRaises(re.error, re.compile, '(?(a))')
183 self.assertRaises(re.error, re.compile, '(?(1a))')
184 self.assertRaises(re.error, re.compile, '(?(a.))')
185
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000186 def test_symbolic_refs(self):
187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
188 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
189 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
190 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200191 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000192 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
193 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
194 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
195 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000196 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000197
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000198 def test_re_subn(self):
199 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
200 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
201 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
202 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
203 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000204
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000205 def test_re_split(self):
206 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
207 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
208 self.assertEqual(re.split("(:*)", ":a:b::c"),
209 ['', ':', 'a', ':', 'b', '::', 'c'])
210 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
211 self.assertEqual(re.split("(:)*", ":a:b::c"),
212 ['', ':', 'a', ':', 'b', ':', 'c'])
213 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
214 ['', ':', 'a', ':b::', 'c'])
215 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
216 ['', None, ':', 'a', None, ':', '', 'b', None, '',
217 None, '::', 'c'])
218 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
219 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000220
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000221 def test_qualified_re_split(self):
222 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
223 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
224 self.assertEqual(re.split("(:)", ":a:b::c", 2),
225 ['', ':', 'a', ':', 'b::c'])
226 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
227 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000228
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000229 def test_re_findall(self):
230 self.assertEqual(re.findall(":+", "abc"), [])
231 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
232 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
233 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
234 (":", ":"),
235 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000236
Skip Montanaro5ba00542003-04-25 16:00:14 +0000237 def test_bug_117612(self):
238 self.assertEqual(re.findall(r"(a|(b))", "aba"),
239 [("a", ""),("b", "b"),("a", "")])
240
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000241 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000242 self.assertEqual(re.match('a', 'a').groups(), ())
243 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
244 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
245 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
246 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000247
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000248 pat = re.compile('((a)|(b))(c)?')
249 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
250 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
251 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
252 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
253 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000254
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000255 # A single group
256 m = re.match('(a)', 'a')
257 self.assertEqual(m.group(0), 'a')
258 self.assertEqual(m.group(0), 'a')
259 self.assertEqual(m.group(1), 'a')
260 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000261
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000262 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
263 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
264 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
265 (None, 'b', None))
266 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000267
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000268 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000269 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
270 ('(', 'a'))
271 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
272 (None, 'a'))
273 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
274 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
275 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
276 ('a', 'b'))
277 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
278 (None, 'd'))
279 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
280 (None, 'd'))
281 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
282 ('a', ''))
283
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000284 # Tests for bug #1177831: exercise groups other than the first group
285 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
286 self.assertEqual(p.match('abc').groups(),
287 ('a', 'b', 'c'))
288 self.assertEqual(p.match('ad').groups(),
289 ('a', None, 'd'))
290 self.assertEqual(p.match('abd'), None)
291 self.assertEqual(p.match('ac'), None)
292
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000293
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000294 def test_re_groupref(self):
295 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
296 ('|', 'a'))
297 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
298 (None, 'a'))
299 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
300 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
301 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
302 ('a', 'a'))
303 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
304 (None, None))
305
306 def test_groupdict(self):
307 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
308 'first second').groupdict(),
309 {'first':'first', 'second':'second'})
310
311 def test_expand(self):
312 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
313 "first second")
314 .expand(r"\2 \1 \g<second> \g<first>"),
315 "second first second first")
316
317 def test_repeat_minmax(self):
318 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
319 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
320 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
321 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
322
323 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
324 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
325 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
326 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
327 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
328 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
329 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
330 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
331
332 self.assertEqual(re.match("^x{1}$", "xxx"), None)
333 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
334 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
335 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
336
337 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
338 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
339 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
340 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
341 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
342 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
343 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
344 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
345
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000346 self.assertEqual(re.match("^x{}$", "xxx"), None)
347 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
348
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000349 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000350 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000351 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000352 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
353 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
354 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
355 {'first': 1, 'other': 2})
356
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000357 self.assertEqual(re.match("(a)", "a").pos, 0)
358 self.assertEqual(re.match("(a)", "a").endpos, 1)
359 self.assertEqual(re.match("(a)", "a").string, "a")
360 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
361 self.assertNotEqual(re.match("(a)", "a").re, None)
362
363 def test_special_escapes(self):
364 self.assertEqual(re.search(r"\b(b.)\b",
365 "abcd abc bcd bx").group(1), "bx")
366 self.assertEqual(re.search(r"\B(b.)\B",
367 "abc bcd bc abxd").group(1), "bx")
368 self.assertEqual(re.search(r"\b(b.)\b",
369 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
370 self.assertEqual(re.search(r"\B(b.)\B",
371 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
372 self.assertEqual(re.search(r"\b(b.)\b",
373 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
374 self.assertEqual(re.search(r"\B(b.)\B",
375 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
376 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
377 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
378 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
379 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000380 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000381 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000382 "abc bcd bc abxd").group(1), "bx")
383 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
385 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000386 self.assertEqual(re.search(r"\d\D\w\W\s\S",
387 "1aa! a").group(0), "1aa! a")
388 self.assertEqual(re.search(r"\d\D\w\W\s\S",
389 "1aa! a", re.LOCALE).group(0), "1aa! a")
390 self.assertEqual(re.search(r"\d\D\w\W\s\S",
391 "1aa! a", re.UNICODE).group(0), "1aa! a")
392
Ezio Melotti5a045b92012-02-29 11:48:44 +0200393 def test_string_boundaries(self):
394 # See http://bugs.python.org/issue10713
395 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
396 "abc")
397 # There's a word boundary at the start of a string.
398 self.assertTrue(re.match(r"\b", "abc"))
399 # A non-empty string includes a non-boundary zero-length match.
400 self.assertTrue(re.search(r"\B", "abc"))
401 # There is no non-boundary match at the start of a string.
402 self.assertFalse(re.match(r"\B", "abc"))
403 # However, an empty string contains no word boundaries, and also no
404 # non-boundaries.
405 self.assertEqual(re.search(r"\B", ""), None)
406 # This one is questionable and different from the perlre behaviour,
407 # but describes current behavior.
408 self.assertEqual(re.search(r"\b", ""), None)
409 # A single word-character string has two boundaries, but no
410 # non-boundary gaps.
411 self.assertEqual(len(re.findall(r"\b", "a")), 2)
412 self.assertEqual(len(re.findall(r"\B", "a")), 0)
413 # If there are no words, there are no boundaries
414 self.assertEqual(len(re.findall(r"\b", " ")), 0)
415 self.assertEqual(len(re.findall(r"\b", " ")), 0)
416 # Can match around the whitespace.
417 self.assertEqual(len(re.findall(r"\B", " ")), 2)
418
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000419 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000420 self.assertEqual(re.match("([\u2222\u2223])",
421 "\u2222").group(1), "\u2222")
422 self.assertEqual(re.match("([\u2222\u2223])",
423 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000424
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100425 def test_big_codesize(self):
426 # Issue #1160
427 r = re.compile('|'.join(('%d'%x for x in range(10000))))
428 self.assertIsNotNone(r.match('1000'))
429 self.assertIsNotNone(r.match('9999'))
430
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000431 def test_anyall(self):
432 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
433 "a\nb")
434 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
435 "a\n\nb")
436
437 def test_non_consuming(self):
438 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
439 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
440 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
441 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
442 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
443 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
444 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
445
446 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
447 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
448 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
450
451 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000452 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
453 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000454 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
455 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
456 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
457 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
458 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
459 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
460 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
461 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
462
463 def test_category(self):
464 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
465
466 def test_getlower(self):
467 import _sre
468 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
469 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
470 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
471
472 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000473 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000474
475 def test_not_literal(self):
476 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
477 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
478
479 def test_search_coverage(self):
480 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
481 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
482
Ezio Melottid2114eb2011-03-25 14:08:44 +0200483 def assertMatch(self, pattern, text, match=None, span=None,
484 matcher=re.match):
485 if match is None and span is None:
486 # the pattern matches the whole text
487 match = text
488 span = (0, len(text))
489 elif match is None or span is None:
490 raise ValueError('If match is not None, span should be specified '
491 '(and vice versa).')
492 m = matcher(pattern, text)
493 self.assertTrue(m)
494 self.assertEqual(m.group(), match)
495 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000496
Ezio Melottid2114eb2011-03-25 14:08:44 +0200497 def test_re_escape(self):
498 alnum_chars = string.ascii_letters + string.digits
499 p = ''.join(chr(i) for i in range(256))
500 for c in p:
501 if c in alnum_chars:
502 self.assertEqual(re.escape(c), c)
503 elif c == '\x00':
504 self.assertEqual(re.escape(c), '\\000')
505 else:
506 self.assertEqual(re.escape(c), '\\' + c)
507 self.assertMatch(re.escape(c), c)
508 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000509
Guido van Rossum698280d2008-09-10 17:44:35 +0000510 def test_re_escape_byte(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200511 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
512 p = bytes(range(256))
513 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000514 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200515 if b in alnum_chars:
516 self.assertEqual(re.escape(b), b)
517 elif i == 0:
518 self.assertEqual(re.escape(b), b'\\000')
519 else:
520 self.assertEqual(re.escape(b), b'\\' + b)
521 self.assertMatch(re.escape(b), b)
522 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000523
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200524 def test_re_escape_non_ascii(self):
525 s = 'xxx\u2620\u2620\u2620xxx'
526 s_escaped = re.escape(s)
527 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
528 self.assertMatch(s_escaped, s)
529 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
530 'x\u2620\u2620\u2620x', (2, 7), re.search)
531
532 def test_re_escape_non_ascii_bytes(self):
533 b = 'y\u2620y\u2620y'.encode('utf-8')
534 b_escaped = re.escape(b)
535 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
536 self.assertMatch(b_escaped, b)
537 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
538 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000539
Skip Montanaro1e703c62003-04-25 15:40:28 +0000540 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000541 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
542 s = pickle.dumps(oldpat)
543 newpat = pickle.loads(s)
544 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000545
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000546 def test_constants(self):
547 self.assertEqual(re.I, re.IGNORECASE)
548 self.assertEqual(re.L, re.LOCALE)
549 self.assertEqual(re.M, re.MULTILINE)
550 self.assertEqual(re.S, re.DOTALL)
551 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000552
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000553 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000554 for flag in [re.I, re.M, re.X, re.S, re.L]:
555 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000556
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000557 def test_sre_character_literals(self):
558 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
559 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
560 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
561 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
562 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
563 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
564 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
565 self.assertRaises(re.error, re.match, "\911", "")
566
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000567 def test_sre_character_class_literals(self):
568 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
569 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
570 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
571 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
572 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
573 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
574 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
575 self.assertRaises(re.error, re.match, "[\911]", "")
576
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000577 def test_bug_113254(self):
578 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
579 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
580 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
581
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000582 def test_bug_527371(self):
583 # bug described in patches 527371/672491
584 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
585 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
586 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
587 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
588 self.assertEqual(re.match("((a))", "a").lastindex, 1)
589
590 def test_bug_545855(self):
591 # bug 545855 -- This pattern failed to cause a compile error as it
592 # should, instead provoking a TypeError.
593 self.assertRaises(re.error, re.compile, 'foo[a-')
594
595 def test_bug_418626(self):
596 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
597 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
598 # pattern '*?' on a long string.
599 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
600 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
601 20003)
602 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000603 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000604 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000605 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000606
607 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000608 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000609 self.assertEqual(re.compile(pat) and 1, 1)
610
Skip Montanaro1e703c62003-04-25 15:40:28 +0000611 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000612 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000613 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000614 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
615 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
616 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000617
618 def test_scanner(self):
619 def s_ident(scanner, token): return token
620 def s_operator(scanner, token): return "op%s" % token
621 def s_float(scanner, token): return float(token)
622 def s_int(scanner, token): return int(token)
623
624 scanner = Scanner([
625 (r"[a-zA-Z_]\w*", s_ident),
626 (r"\d+\.\d*", s_float),
627 (r"\d+", s_int),
628 (r"=|\+|-|\*|/", s_operator),
629 (r"\s+", None),
630 ])
631
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000632 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
633
Skip Montanaro1e703c62003-04-25 15:40:28 +0000634 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
635 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
636 'op+', 'bar'], ''))
637
Skip Montanaro5ba00542003-04-25 16:00:14 +0000638 def test_bug_448951(self):
639 # bug 448951 (similar to 429357, but with single char match)
640 # (Also test greedy matches.)
641 for op in '','?','*':
642 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
643 (None, None))
644 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
645 ('a:', 'a'))
646
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000647 def test_bug_725106(self):
648 # capturing groups in alternatives in repeats
649 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
650 ('b', 'a'))
651 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
652 ('c', 'b'))
653 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
654 ('b', None))
655 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
656 ('b', None))
657 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
658 ('b', 'a'))
659 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
660 ('c', 'b'))
661 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
662 ('b', None))
663 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
664 ('b', None))
665
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000666 def test_bug_725149(self):
667 # mark_stack_base restoring before restoring marks
668 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
669 ('a', None))
670 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
671 ('a', None, None))
672
Just van Rossum12723ba2003-07-02 20:03:04 +0000673 def test_bug_764548(self):
674 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000675 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000676 pat = re.compile(my_unicode("abc"))
677 self.assertEqual(pat.match("xyz"), None)
678
Skip Montanaro5ba00542003-04-25 16:00:14 +0000679 def test_finditer(self):
680 iter = re.finditer(r":+", "a:b::c:::d")
681 self.assertEqual([item.group(0) for item in iter],
682 [":", "::", ":::"])
683
Thomas Wouters40a088d2008-03-18 20:19:54 +0000684 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000685 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000686 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000687
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000688 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000689 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000690 self.assertEqual(re.compile(pattern).split("a.b.c"),
691 ['a','b','c'])
692
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000693 def test_bug_581080(self):
694 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000695 self.assertEqual(next(iter).span(), (1,2))
696 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000697
698 scanner = re.compile(r"\s").scanner("a b")
699 self.assertEqual(scanner.search().span(), (1, 2))
700 self.assertEqual(scanner.search(), None)
701
702 def test_bug_817234(self):
703 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000704 self.assertEqual(next(iter).span(), (0, 4))
705 self.assertEqual(next(iter).span(), (4, 4))
706 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000707
Mark Dickinson1f268282009-07-28 17:22:36 +0000708 def test_bug_6561(self):
709 # '\d' should match characters in Unicode category 'Nd'
710 # (Number, Decimal Digit), but not those in 'Nl' (Number,
711 # Letter) or 'No' (Number, Other).
712 decimal_digits = [
713 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
714 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
715 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
716 ]
717 for x in decimal_digits:
718 self.assertEqual(re.match('^\d$', x).group(0), x)
719
720 not_decimal_digits = [
721 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
722 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
723 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
724 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
725 ]
726 for x in not_decimal_digits:
727 self.assertIsNone(re.match('^\d$', x))
728
Guido van Rossumd8faa362007-04-27 19:54:29 +0000729 def test_empty_array(self):
730 # SF buf 1647541
731 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000732 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000733 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000734 self.assertEqual(re.compile(b"bla").match(a), None)
735 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000736
Christian Heimes072c0f12008-01-03 23:01:04 +0000737 def test_inline_flags(self):
738 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000739 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
740 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000741
742 p = re.compile(upper_char, re.I | re.U)
743 q = p.match(lower_char)
744 self.assertNotEqual(q, None)
745
746 p = re.compile(lower_char, re.I | re.U)
747 q = p.match(upper_char)
748 self.assertNotEqual(q, None)
749
750 p = re.compile('(?i)' + upper_char, re.U)
751 q = p.match(lower_char)
752 self.assertNotEqual(q, None)
753
754 p = re.compile('(?i)' + lower_char, re.U)
755 q = p.match(upper_char)
756 self.assertNotEqual(q, None)
757
758 p = re.compile('(?iu)' + upper_char)
759 q = p.match(lower_char)
760 self.assertNotEqual(q, None)
761
762 p = re.compile('(?iu)' + lower_char)
763 q = p.match(upper_char)
764 self.assertNotEqual(q, None)
765
Christian Heimes25bb7832008-01-11 16:17:00 +0000766 def test_dollar_matches_twice(self):
767 "$ matches the end of string, and just before the terminating \n"
768 pattern = re.compile('$')
769 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
770 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
771 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
772
773 pattern = re.compile('$', re.MULTILINE)
774 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
775 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
776 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
777
Antoine Pitroufd036452008-08-19 17:56:33 +0000778 def test_bytes_str_mixing(self):
779 # Mixing str and bytes is disallowed
780 pat = re.compile('.')
781 bpat = re.compile(b'.')
782 self.assertRaises(TypeError, pat.match, b'b')
783 self.assertRaises(TypeError, bpat.match, 'b')
784 self.assertRaises(TypeError, pat.sub, b'b', 'c')
785 self.assertRaises(TypeError, pat.sub, 'b', b'c')
786 self.assertRaises(TypeError, pat.sub, b'b', b'c')
787 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
788 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
789 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
790
791 def test_ascii_and_unicode_flag(self):
792 # String patterns
793 for flags in (0, re.UNICODE):
794 pat = re.compile('\xc0', flags | re.IGNORECASE)
795 self.assertNotEqual(pat.match('\xe0'), None)
796 pat = re.compile('\w', flags)
797 self.assertNotEqual(pat.match('\xe0'), None)
798 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
799 self.assertEqual(pat.match('\xe0'), None)
800 pat = re.compile('(?a)\xc0', re.IGNORECASE)
801 self.assertEqual(pat.match('\xe0'), None)
802 pat = re.compile('\w', re.ASCII)
803 self.assertEqual(pat.match('\xe0'), None)
804 pat = re.compile('(?a)\w')
805 self.assertEqual(pat.match('\xe0'), None)
806 # Bytes patterns
807 for flags in (0, re.ASCII):
808 pat = re.compile(b'\xc0', re.IGNORECASE)
809 self.assertEqual(pat.match(b'\xe0'), None)
810 pat = re.compile(b'\w')
811 self.assertEqual(pat.match(b'\xe0'), None)
812 # Incompatibilities
813 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
814 self.assertRaises(ValueError, re.compile, b'(?u)\w')
815 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
816 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
817 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
818 self.assertRaises(ValueError, re.compile, '(?au)\w')
819
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000820 def test_bug_6509(self):
821 # Replacement strings of both types must parse properly.
822 # all strings
823 pat = re.compile('a(\w)')
824 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
825 pat = re.compile('a(.)')
826 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
827 pat = re.compile('..')
828 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
829
830 # all bytes
831 pat = re.compile(b'a(\w)')
832 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
833 pat = re.compile(b'a(.)')
834 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
835 pat = re.compile(b'..')
836 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
837
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000838 def test_dealloc(self):
839 # issue 3299: check for segfault in debug build
840 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000841 # the overflow limit is different on wide and narrow builds and it
842 # depends on the definition of SRE_CODE (see sre.h).
843 # 2**128 should be big enough to overflow on both. For smaller values
844 # a RuntimeError is raised instead of OverflowError.
845 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000846 self.assertRaises(TypeError, re.finditer, "a", {})
847 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000848 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000849
Ezio Melottidf723e12012-03-13 01:29:48 +0200850 def test_compile(self):
851 # Test return value when given string and pattern as parameter
852 pattern = re.compile('random pattern')
853 self.assertIsInstance(pattern, re._pattern_type)
854 same_pattern = re.compile(pattern)
855 self.assertIsInstance(same_pattern, re._pattern_type)
856 self.assertIs(same_pattern, pattern)
857 # Test behaviour when not given a string or pattern as parameter
858 self.assertRaises(TypeError, re.compile, 0)
859
Ezio Melottife8e6e72013-01-11 08:32:01 +0200860 def test_bug_13899(self):
861 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
862 # nothing. Ditto B and Z.
863 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
864 ['A', 'B', '\b', 'C', 'Z'])
865
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100866 @bigmemtest(size=_2G, memuse=character_size)
867 def test_large_search(self, size):
868 # Issue #10182: indices were 32-bit-truncated.
869 s = 'a' * size
870 m = re.search('$', s)
871 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100872 self.assertEqual(m.start(), size)
873 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100874
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100875 # The huge memuse is because of re.sub() using a list and a join()
876 # to create the replacement result.
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100877 @bigmemtest(size=_2G, memuse=16 + 2 * character_size)
878 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100879 # Issue #10182: indices were 32-bit-truncated.
880 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100881 r, n = re.subn('', '', s)
882 self.assertEqual(r, s)
883 self.assertEqual(n, size + 1)
884
885
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000886def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000887 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000888 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000889 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000890 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000891 # To save time, only run the first and last 10 tests
892 #tests = tests[:10] + tests[-10:]
893 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000894
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000895 for t in tests:
896 sys.stdout.flush()
897 pattern = s = outcome = repl = expected = None
898 if len(t) == 5:
899 pattern, s, outcome, repl, expected = t
900 elif len(t) == 3:
901 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000902 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000903 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000904
Guido van Rossum41360a41998-03-26 19:42:58 +0000905 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000906 obj = re.compile(pattern)
907 except re.error:
908 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000909 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000910 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000911 except KeyboardInterrupt: raise KeyboardInterrupt
912 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000913 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000914 if verbose:
915 traceback.print_exc(file=sys.stdout)
916 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000917 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000918 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000919 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000920 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000921 if outcome == SYNTAX_ERROR:
922 # This should have been a syntax error; forget it.
923 pass
924 elif outcome == FAIL:
925 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000926 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000927 elif outcome == SUCCEED:
928 if result is not None:
929 # Matched, as expected, so now we compute the
930 # result string and compare it to our expected result.
931 start, end = result.span(0)
932 vardict={'found': result.group(0),
933 'groups': result.group(),
934 'flags': result.re.flags}
935 for i in range(1, 100):
936 try:
937 gi = result.group(i)
938 # Special hack because else the string concat fails:
939 if gi is None:
940 gi = "None"
941 except IndexError:
942 gi = "Error"
943 vardict['g%d' % i] = gi
944 for i in result.re.groupindex.keys():
945 try:
946 gi = result.group(i)
947 if gi is None:
948 gi = "None"
949 except IndexError:
950 gi = "Error"
951 vardict[i] = gi
952 repl = eval(repl, vardict)
953 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000954 print('=== grouping error', t, end=' ')
955 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000956 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000957 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000958
Antoine Pitrou22628c42008-07-22 17:53:22 +0000959 # Try the match with both pattern and string converted to
960 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000961 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000962 bpat = bytes(pattern, "ascii")
963 bs = bytes(s, "ascii")
964 except UnicodeEncodeError:
965 # skip non-ascii tests
966 pass
967 else:
968 try:
969 bpat = re.compile(bpat)
970 except Exception:
971 print('=== Fails on bytes pattern compile', t)
972 if verbose:
973 traceback.print_exc(file=sys.stdout)
974 else:
975 bytes_result = bpat.search(bs)
976 if bytes_result is None:
977 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000978
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000979 # Try the match with the search area limited to the extent
980 # of the match and see if it still succeeds. \B will
981 # break (because it won't match at the end or start of a
982 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000983
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000984 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
985 and result is not None:
986 obj = re.compile(pattern)
987 result = obj.search(s, result.start(0), result.end(0) + 1)
988 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000989 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000990
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000991 # Try the match with IGNORECASE enabled, and check that it
992 # still succeeds.
993 obj = re.compile(pattern, re.IGNORECASE)
994 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000995 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000996 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000997
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000998 # Try the match with LOCALE enabled, and check that it
999 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001000 if '(?u)' not in pattern:
1001 obj = re.compile(pattern, re.LOCALE)
1002 result = obj.search(s)
1003 if result is None:
1004 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001005
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001006 # Try the match with UNICODE locale enabled, and check
1007 # that it still succeeds.
1008 obj = re.compile(pattern, re.UNICODE)
1009 result = obj.search(s)
1010 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001011 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001012
Gregory P. Smith5a631832010-07-27 05:31:29 +00001013
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001014def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001015 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001016 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001017
1018if __name__ == "__main__":
1019 test_main()