blob: 0c95f4e6ae8cb07c7e0e161c5e271d5710b06ba5 [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2 cpython_only
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00005from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02006import sys
7import string
8import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00009from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000010
Antoine Pitrou1f1888e2012-12-03 20:53:12 +010011from test.test_bigmem import character_size
12
13
Guido van Rossum23b22571997-07-17 22:36:14 +000014# Misc tests from Tim Peters' re.doc
15
Just van Rossum6802c6e2003-07-02 14:36:59 +000016# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020017# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000018# cover most of the code.
19
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000021
Skip Montanaro8ed06da2003-04-24 19:43:18 +000022class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000023
Benjamin Petersone48944b2012-03-07 14:50:25 -060024 def test_keep_buffer(self):
25 # See bug 14212
26 b = bytearray(b'x')
27 it = re.finditer(b'a', b)
28 with self.assertRaises(BufferError):
29 b.extend(b'x'*400)
30 list(it)
31 del it
32 gc_collect()
33 b.extend(b'x'*400)
34
Raymond Hettinger027bb632004-05-31 03:09:25 +000035 def test_weakref(self):
36 s = 'QabbbcR'
37 x = re.compile('ab+c')
38 y = proxy(x)
39 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
40
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_search_star_plus(self):
42 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
43 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
44 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
45 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000046 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000047 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
48 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
49 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
50 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000051 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000054 int_value = int(matchobj.group(0))
55 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 def test_basic_re_sub(self):
58 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
59 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
60 '9.3 -3 24x100y')
61 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
62 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000063
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
65 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 s = r"\1\1"
68 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
69 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
70 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000071
Skip Montanaro8ed06da2003-04-24 19:43:18 +000072 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
73 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
74 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
75 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000076
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
78 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
79 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
80 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
81 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000082
Skip Montanaro8ed06da2003-04-24 19:43:18 +000083 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000084
Skip Montanaro2726fcd2003-04-25 14:31:54 +000085 def test_bug_449964(self):
86 # fails for group followed by other escape
87 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
88 'xx\bxx\b')
89
90 def test_bug_449000(self):
91 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000092 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
93 'abc\ndef\n')
94 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
95 'abc\ndef\n')
96 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
97 'abc\ndef\n')
98 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
99 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000100
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000101 def test_bug_1661(self):
102 # Verify that flags do not get silently ignored with compiled patterns
103 pattern = re.compile('.')
104 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
105 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
106 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
107 self.assertRaises(ValueError, re.compile, pattern, re.I)
108
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000109 def test_bug_3629(self):
110 # A regex that triggered a bug in the sre-code validator
111 re.compile("(?P<quote>)(?(quote))")
112
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000113 def test_sub_template_numeric_escape(self):
114 # bug 776311 and friends
115 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
116 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
117 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
118 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
119 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
120 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
121 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
122
123 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
124 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
125
126 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
127 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
128 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
129 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
130 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
131
132 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
133 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000134
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000135 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
142 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
144 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
146 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
147
148 # in python2.3 (etc), these loop endlessly in sre_parser.py
149 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
150 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
151 'xz8')
152 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
153 'xza')
154
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000155 def test_qualified_re_sub(self):
156 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
157 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000158
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000159 def test_bug_114660(self):
160 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
161 'hello there')
162
163 def test_bug_462270(self):
164 # Test for empty sub() behaviour, see SF bug #462270
165 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
166 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
167
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200168 def test_symbolic_groups(self):
169 re.compile('(?P<a>x)(?P=a)(?(a)y)')
170 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
171 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
172 self.assertRaises(re.error, re.compile, '(?Px)')
173 self.assertRaises(re.error, re.compile, '(?P=)')
174 self.assertRaises(re.error, re.compile, '(?P=1)')
175 self.assertRaises(re.error, re.compile, '(?P=a)')
176 self.assertRaises(re.error, re.compile, '(?P=a1)')
177 self.assertRaises(re.error, re.compile, '(?P=a.)')
178 self.assertRaises(re.error, re.compile, '(?P<)')
179 self.assertRaises(re.error, re.compile, '(?P<>)')
180 self.assertRaises(re.error, re.compile, '(?P<1>)')
181 self.assertRaises(re.error, re.compile, '(?P<a.>)')
182 self.assertRaises(re.error, re.compile, '(?())')
183 self.assertRaises(re.error, re.compile, '(?(a))')
184 self.assertRaises(re.error, re.compile, '(?(1a))')
185 self.assertRaises(re.error, re.compile, '(?(a.))')
186
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000187 def test_symbolic_refs(self):
188 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
189 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
190 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
191 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200192 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000193 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
194 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
195 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
196 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000197 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000198
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000199 def test_re_subn(self):
200 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
201 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
202 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
203 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
204 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000205
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000206 def test_re_split(self):
207 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
208 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
209 self.assertEqual(re.split("(:*)", ":a:b::c"),
210 ['', ':', 'a', ':', 'b', '::', 'c'])
211 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
212 self.assertEqual(re.split("(:)*", ":a:b::c"),
213 ['', ':', 'a', ':', 'b', ':', 'c'])
214 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
215 ['', ':', 'a', ':b::', 'c'])
216 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
217 ['', None, ':', 'a', None, ':', '', 'b', None, '',
218 None, '::', 'c'])
219 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
220 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000221
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000222 def test_qualified_re_split(self):
223 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
224 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
225 self.assertEqual(re.split("(:)", ":a:b::c", 2),
226 ['', ':', 'a', ':', 'b::c'])
227 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
228 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000229
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000230 def test_re_findall(self):
231 self.assertEqual(re.findall(":+", "abc"), [])
232 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
233 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
234 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
235 (":", ":"),
236 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000237
Skip Montanaro5ba00542003-04-25 16:00:14 +0000238 def test_bug_117612(self):
239 self.assertEqual(re.findall(r"(a|(b))", "aba"),
240 [("a", ""),("b", "b"),("a", "")])
241
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000242 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000243 self.assertEqual(re.match('a', 'a').groups(), ())
244 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
245 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
246 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
247 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000248
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000249 pat = re.compile('((a)|(b))(c)?')
250 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
251 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
252 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
253 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
254 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000255
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000256 # A single group
257 m = re.match('(a)', 'a')
258 self.assertEqual(m.group(0), 'a')
259 self.assertEqual(m.group(0), 'a')
260 self.assertEqual(m.group(1), 'a')
261 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000262
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000263 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
264 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
265 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
266 (None, 'b', None))
267 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000268
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000269 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000270 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
271 ('(', 'a'))
272 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
273 (None, 'a'))
274 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
275 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
276 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
277 ('a', 'b'))
278 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
279 (None, 'd'))
280 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
281 (None, 'd'))
282 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
283 ('a', ''))
284
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000285 # Tests for bug #1177831: exercise groups other than the first group
286 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
287 self.assertEqual(p.match('abc').groups(),
288 ('a', 'b', 'c'))
289 self.assertEqual(p.match('ad').groups(),
290 ('a', None, 'd'))
291 self.assertEqual(p.match('abd'), None)
292 self.assertEqual(p.match('ac'), None)
293
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000294
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000295 def test_re_groupref(self):
296 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
297 ('|', 'a'))
298 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
299 (None, 'a'))
300 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
301 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
302 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
303 ('a', 'a'))
304 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
305 (None, None))
306
307 def test_groupdict(self):
308 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
309 'first second').groupdict(),
310 {'first':'first', 'second':'second'})
311
312 def test_expand(self):
313 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
314 "first second")
315 .expand(r"\2 \1 \g<second> \g<first>"),
316 "second first second first")
317
318 def test_repeat_minmax(self):
319 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
320 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
321 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
322 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
323
324 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
325 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
326 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
327 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
328 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
329 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
330 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
331 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
332
333 self.assertEqual(re.match("^x{1}$", "xxx"), None)
334 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
335 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
336 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
337
338 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
339 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
340 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
341 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
342 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
343 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
344 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
345 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
346
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000347 self.assertEqual(re.match("^x{}$", "xxx"), None)
348 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
349
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000350 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000351 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000352 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000353 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
354 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
355 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
356 {'first': 1, 'other': 2})
357
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000358 self.assertEqual(re.match("(a)", "a").pos, 0)
359 self.assertEqual(re.match("(a)", "a").endpos, 1)
360 self.assertEqual(re.match("(a)", "a").string, "a")
361 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
362 self.assertNotEqual(re.match("(a)", "a").re, None)
363
364 def test_special_escapes(self):
365 self.assertEqual(re.search(r"\b(b.)\b",
366 "abcd abc bcd bx").group(1), "bx")
367 self.assertEqual(re.search(r"\B(b.)\B",
368 "abc bcd bc abxd").group(1), "bx")
369 self.assertEqual(re.search(r"\b(b.)\b",
370 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
371 self.assertEqual(re.search(r"\B(b.)\B",
372 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
373 self.assertEqual(re.search(r"\b(b.)\b",
374 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
375 self.assertEqual(re.search(r"\B(b.)\B",
376 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
377 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
378 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
379 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
380 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000381 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000382 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000383 "abc bcd bc abxd").group(1), "bx")
384 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
385 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
386 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000387 self.assertEqual(re.search(r"\d\D\w\W\s\S",
388 "1aa! a").group(0), "1aa! a")
389 self.assertEqual(re.search(r"\d\D\w\W\s\S",
390 "1aa! a", re.LOCALE).group(0), "1aa! a")
391 self.assertEqual(re.search(r"\d\D\w\W\s\S",
392 "1aa! a", re.UNICODE).group(0), "1aa! a")
393
Ezio Melotti5a045b92012-02-29 11:48:44 +0200394 def test_string_boundaries(self):
395 # See http://bugs.python.org/issue10713
396 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
397 "abc")
398 # There's a word boundary at the start of a string.
399 self.assertTrue(re.match(r"\b", "abc"))
400 # A non-empty string includes a non-boundary zero-length match.
401 self.assertTrue(re.search(r"\B", "abc"))
402 # There is no non-boundary match at the start of a string.
403 self.assertFalse(re.match(r"\B", "abc"))
404 # However, an empty string contains no word boundaries, and also no
405 # non-boundaries.
406 self.assertEqual(re.search(r"\B", ""), None)
407 # This one is questionable and different from the perlre behaviour,
408 # but describes current behavior.
409 self.assertEqual(re.search(r"\b", ""), None)
410 # A single word-character string has two boundaries, but no
411 # non-boundary gaps.
412 self.assertEqual(len(re.findall(r"\b", "a")), 2)
413 self.assertEqual(len(re.findall(r"\B", "a")), 0)
414 # If there are no words, there are no boundaries
415 self.assertEqual(len(re.findall(r"\b", " ")), 0)
416 self.assertEqual(len(re.findall(r"\b", " ")), 0)
417 # Can match around the whitespace.
418 self.assertEqual(len(re.findall(r"\B", " ")), 2)
419
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000420 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000421 self.assertEqual(re.match("([\u2222\u2223])",
422 "\u2222").group(1), "\u2222")
423 self.assertEqual(re.match("([\u2222\u2223])",
424 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000425
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100426 def test_big_codesize(self):
427 # Issue #1160
428 r = re.compile('|'.join(('%d'%x for x in range(10000))))
429 self.assertIsNotNone(r.match('1000'))
430 self.assertIsNotNone(r.match('9999'))
431
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000432 def test_anyall(self):
433 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
434 "a\nb")
435 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
436 "a\n\nb")
437
438 def test_non_consuming(self):
439 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
440 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
441 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
442 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
443 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
444 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
445 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
446
447 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
448 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
450 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
451
452 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000453 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
454 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000455 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
456 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
457 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
458 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
459 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
460 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
461 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
462 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
463
464 def test_category(self):
465 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
466
467 def test_getlower(self):
468 import _sre
469 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
470 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
471 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
472
473 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000474 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000475
476 def test_not_literal(self):
477 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
478 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
479
480 def test_search_coverage(self):
481 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
482 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
483
Ezio Melottid2114eb2011-03-25 14:08:44 +0200484 def assertMatch(self, pattern, text, match=None, span=None,
485 matcher=re.match):
486 if match is None and span is None:
487 # the pattern matches the whole text
488 match = text
489 span = (0, len(text))
490 elif match is None or span is None:
491 raise ValueError('If match is not None, span should be specified '
492 '(and vice versa).')
493 m = matcher(pattern, text)
494 self.assertTrue(m)
495 self.assertEqual(m.group(), match)
496 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000497
Ezio Melottid2114eb2011-03-25 14:08:44 +0200498 def test_re_escape(self):
499 alnum_chars = string.ascii_letters + string.digits
500 p = ''.join(chr(i) for i in range(256))
501 for c in p:
502 if c in alnum_chars:
503 self.assertEqual(re.escape(c), c)
504 elif c == '\x00':
505 self.assertEqual(re.escape(c), '\\000')
506 else:
507 self.assertEqual(re.escape(c), '\\' + c)
508 self.assertMatch(re.escape(c), c)
509 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000510
Guido van Rossum698280d2008-09-10 17:44:35 +0000511 def test_re_escape_byte(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200512 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
513 p = bytes(range(256))
514 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000515 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200516 if b in alnum_chars:
517 self.assertEqual(re.escape(b), b)
518 elif i == 0:
519 self.assertEqual(re.escape(b), b'\\000')
520 else:
521 self.assertEqual(re.escape(b), b'\\' + b)
522 self.assertMatch(re.escape(b), b)
523 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000524
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200525 def test_re_escape_non_ascii(self):
526 s = 'xxx\u2620\u2620\u2620xxx'
527 s_escaped = re.escape(s)
528 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
529 self.assertMatch(s_escaped, s)
530 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
531 'x\u2620\u2620\u2620x', (2, 7), re.search)
532
533 def test_re_escape_non_ascii_bytes(self):
534 b = 'y\u2620y\u2620y'.encode('utf-8')
535 b_escaped = re.escape(b)
536 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
537 self.assertMatch(b_escaped, b)
538 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
539 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000540
Skip Montanaro1e703c62003-04-25 15:40:28 +0000541 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000542 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
543 s = pickle.dumps(oldpat)
544 newpat = pickle.loads(s)
545 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000546
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000547 def test_constants(self):
548 self.assertEqual(re.I, re.IGNORECASE)
549 self.assertEqual(re.L, re.LOCALE)
550 self.assertEqual(re.M, re.MULTILINE)
551 self.assertEqual(re.S, re.DOTALL)
552 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000553
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000554 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000555 for flag in [re.I, re.M, re.X, re.S, re.L]:
556 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000557
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000558 def test_sre_character_literals(self):
559 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
560 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
561 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
562 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
563 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
564 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
565 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
566 self.assertRaises(re.error, re.match, "\911", "")
567
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000568 def test_sre_character_class_literals(self):
569 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
570 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
571 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
572 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
573 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
574 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
575 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
576 self.assertRaises(re.error, re.match, "[\911]", "")
577
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000578 def test_bug_113254(self):
579 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
580 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
581 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
582
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000583 def test_bug_527371(self):
584 # bug described in patches 527371/672491
585 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
586 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
587 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
588 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
589 self.assertEqual(re.match("((a))", "a").lastindex, 1)
590
591 def test_bug_545855(self):
592 # bug 545855 -- This pattern failed to cause a compile error as it
593 # should, instead provoking a TypeError.
594 self.assertRaises(re.error, re.compile, 'foo[a-')
595
596 def test_bug_418626(self):
597 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
598 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
599 # pattern '*?' on a long string.
600 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
601 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
602 20003)
603 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000604 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000605 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000606 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000607
608 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000609 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000610 self.assertEqual(re.compile(pat) and 1, 1)
611
Skip Montanaro1e703c62003-04-25 15:40:28 +0000612 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000613 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000614 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000615 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
616 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
617 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000618
Serhiy Storchakafa468162013-02-16 21:23:53 +0200619 def test_unlimited_zero_width_repeat(self):
620 # Issue #9669
621 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
622 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
623 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
624 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
625 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
626 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
627
Skip Montanaro1e703c62003-04-25 15:40:28 +0000628 def test_scanner(self):
629 def s_ident(scanner, token): return token
630 def s_operator(scanner, token): return "op%s" % token
631 def s_float(scanner, token): return float(token)
632 def s_int(scanner, token): return int(token)
633
634 scanner = Scanner([
635 (r"[a-zA-Z_]\w*", s_ident),
636 (r"\d+\.\d*", s_float),
637 (r"\d+", s_int),
638 (r"=|\+|-|\*|/", s_operator),
639 (r"\s+", None),
640 ])
641
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000642 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
643
Skip Montanaro1e703c62003-04-25 15:40:28 +0000644 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
645 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
646 'op+', 'bar'], ''))
647
Skip Montanaro5ba00542003-04-25 16:00:14 +0000648 def test_bug_448951(self):
649 # bug 448951 (similar to 429357, but with single char match)
650 # (Also test greedy matches.)
651 for op in '','?','*':
652 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
653 (None, None))
654 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
655 ('a:', 'a'))
656
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000657 def test_bug_725106(self):
658 # capturing groups in alternatives in repeats
659 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
660 ('b', 'a'))
661 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
662 ('c', 'b'))
663 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
664 ('b', None))
665 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
666 ('b', None))
667 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
668 ('b', 'a'))
669 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
670 ('c', 'b'))
671 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
672 ('b', None))
673 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
674 ('b', None))
675
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000676 def test_bug_725149(self):
677 # mark_stack_base restoring before restoring marks
678 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
679 ('a', None))
680 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
681 ('a', None, None))
682
Just van Rossum12723ba2003-07-02 20:03:04 +0000683 def test_bug_764548(self):
684 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000685 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000686 pat = re.compile(my_unicode("abc"))
687 self.assertEqual(pat.match("xyz"), None)
688
Skip Montanaro5ba00542003-04-25 16:00:14 +0000689 def test_finditer(self):
690 iter = re.finditer(r":+", "a:b::c:::d")
691 self.assertEqual([item.group(0) for item in iter],
692 [":", "::", ":::"])
693
Thomas Wouters40a088d2008-03-18 20:19:54 +0000694 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000695 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000696 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000697
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000698 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000699 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000700 self.assertEqual(re.compile(pattern).split("a.b.c"),
701 ['a','b','c'])
702
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000703 def test_bug_581080(self):
704 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000705 self.assertEqual(next(iter).span(), (1,2))
706 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000707
708 scanner = re.compile(r"\s").scanner("a b")
709 self.assertEqual(scanner.search().span(), (1, 2))
710 self.assertEqual(scanner.search(), None)
711
712 def test_bug_817234(self):
713 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000714 self.assertEqual(next(iter).span(), (0, 4))
715 self.assertEqual(next(iter).span(), (4, 4))
716 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000717
Mark Dickinson1f268282009-07-28 17:22:36 +0000718 def test_bug_6561(self):
719 # '\d' should match characters in Unicode category 'Nd'
720 # (Number, Decimal Digit), but not those in 'Nl' (Number,
721 # Letter) or 'No' (Number, Other).
722 decimal_digits = [
723 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
724 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
725 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
726 ]
727 for x in decimal_digits:
728 self.assertEqual(re.match('^\d$', x).group(0), x)
729
730 not_decimal_digits = [
731 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
732 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
733 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
734 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
735 ]
736 for x in not_decimal_digits:
737 self.assertIsNone(re.match('^\d$', x))
738
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739 def test_empty_array(self):
740 # SF buf 1647541
741 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000742 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000743 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000744 self.assertEqual(re.compile(b"bla").match(a), None)
745 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000746
Christian Heimes072c0f12008-01-03 23:01:04 +0000747 def test_inline_flags(self):
748 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000749 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
750 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000751
752 p = re.compile(upper_char, re.I | re.U)
753 q = p.match(lower_char)
754 self.assertNotEqual(q, None)
755
756 p = re.compile(lower_char, re.I | re.U)
757 q = p.match(upper_char)
758 self.assertNotEqual(q, None)
759
760 p = re.compile('(?i)' + upper_char, re.U)
761 q = p.match(lower_char)
762 self.assertNotEqual(q, None)
763
764 p = re.compile('(?i)' + lower_char, re.U)
765 q = p.match(upper_char)
766 self.assertNotEqual(q, None)
767
768 p = re.compile('(?iu)' + upper_char)
769 q = p.match(lower_char)
770 self.assertNotEqual(q, None)
771
772 p = re.compile('(?iu)' + lower_char)
773 q = p.match(upper_char)
774 self.assertNotEqual(q, None)
775
Christian Heimes25bb7832008-01-11 16:17:00 +0000776 def test_dollar_matches_twice(self):
777 "$ matches the end of string, and just before the terminating \n"
778 pattern = re.compile('$')
779 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
780 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
781 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
782
783 pattern = re.compile('$', re.MULTILINE)
784 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
785 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
786 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
787
Antoine Pitroufd036452008-08-19 17:56:33 +0000788 def test_bytes_str_mixing(self):
789 # Mixing str and bytes is disallowed
790 pat = re.compile('.')
791 bpat = re.compile(b'.')
792 self.assertRaises(TypeError, pat.match, b'b')
793 self.assertRaises(TypeError, bpat.match, 'b')
794 self.assertRaises(TypeError, pat.sub, b'b', 'c')
795 self.assertRaises(TypeError, pat.sub, 'b', b'c')
796 self.assertRaises(TypeError, pat.sub, b'b', b'c')
797 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
798 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
799 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
800
801 def test_ascii_and_unicode_flag(self):
802 # String patterns
803 for flags in (0, re.UNICODE):
804 pat = re.compile('\xc0', flags | re.IGNORECASE)
805 self.assertNotEqual(pat.match('\xe0'), None)
806 pat = re.compile('\w', flags)
807 self.assertNotEqual(pat.match('\xe0'), None)
808 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
809 self.assertEqual(pat.match('\xe0'), None)
810 pat = re.compile('(?a)\xc0', re.IGNORECASE)
811 self.assertEqual(pat.match('\xe0'), None)
812 pat = re.compile('\w', re.ASCII)
813 self.assertEqual(pat.match('\xe0'), None)
814 pat = re.compile('(?a)\w')
815 self.assertEqual(pat.match('\xe0'), None)
816 # Bytes patterns
817 for flags in (0, re.ASCII):
818 pat = re.compile(b'\xc0', re.IGNORECASE)
819 self.assertEqual(pat.match(b'\xe0'), None)
820 pat = re.compile(b'\w')
821 self.assertEqual(pat.match(b'\xe0'), None)
822 # Incompatibilities
823 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
824 self.assertRaises(ValueError, re.compile, b'(?u)\w')
825 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
826 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
827 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
828 self.assertRaises(ValueError, re.compile, '(?au)\w')
829
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000830 def test_bug_6509(self):
831 # Replacement strings of both types must parse properly.
832 # all strings
833 pat = re.compile('a(\w)')
834 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
835 pat = re.compile('a(.)')
836 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
837 pat = re.compile('..')
838 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
839
840 # all bytes
841 pat = re.compile(b'a(\w)')
842 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
843 pat = re.compile(b'a(.)')
844 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
845 pat = re.compile(b'..')
846 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
847
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000848 def test_dealloc(self):
849 # issue 3299: check for segfault in debug build
850 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000851 # the overflow limit is different on wide and narrow builds and it
852 # depends on the definition of SRE_CODE (see sre.h).
853 # 2**128 should be big enough to overflow on both. For smaller values
854 # a RuntimeError is raised instead of OverflowError.
855 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000856 self.assertRaises(TypeError, re.finditer, "a", {})
857 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000858 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000859
Ezio Melottidf723e12012-03-13 01:29:48 +0200860 def test_compile(self):
861 # Test return value when given string and pattern as parameter
862 pattern = re.compile('random pattern')
863 self.assertIsInstance(pattern, re._pattern_type)
864 same_pattern = re.compile(pattern)
865 self.assertIsInstance(same_pattern, re._pattern_type)
866 self.assertIs(same_pattern, pattern)
867 # Test behaviour when not given a string or pattern as parameter
868 self.assertRaises(TypeError, re.compile, 0)
869
Ezio Melottife8e6e72013-01-11 08:32:01 +0200870 def test_bug_13899(self):
871 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
872 # nothing. Ditto B and Z.
873 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
874 ['A', 'B', '\b', 'C', 'Z'])
875
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100876 @bigmemtest(size=_2G, memuse=character_size)
877 def test_large_search(self, size):
878 # Issue #10182: indices were 32-bit-truncated.
879 s = 'a' * size
880 m = re.search('$', s)
881 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100882 self.assertEqual(m.start(), size)
883 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100884
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100885 # The huge memuse is because of re.sub() using a list and a join()
886 # to create the replacement result.
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100887 @bigmemtest(size=_2G, memuse=16 + 2 * character_size)
888 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100889 # Issue #10182: indices were 32-bit-truncated.
890 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100891 r, n = re.subn('', '', s)
892 self.assertEqual(r, s)
893 self.assertEqual(n, size + 1)
894
895
Serhiy Storchaka70ca0212013-02-16 16:47:47 +0200896 def test_repeat_minmax_overflow(self):
897 # Issue #13169
898 string = "x" * 100000
899 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
900 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
901 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
902 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
903 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
904 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
905 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
906 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
907 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
908 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
909 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
910
911 @cpython_only
912 def test_repeat_minmax_overflow_maxrepeat(self):
913 try:
914 from _sre import MAXREPEAT
915 except ImportError:
916 self.skipTest('requires _sre.MAXREPEAT constant')
917 string = "x" * 100000
918 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
919 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
920 (0, 100000))
921 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
922 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
923 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
924 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
925
926
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000927def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000928 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000929 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000930 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000931 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000932 # To save time, only run the first and last 10 tests
933 #tests = tests[:10] + tests[-10:]
934 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000935
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000936 for t in tests:
937 sys.stdout.flush()
938 pattern = s = outcome = repl = expected = None
939 if len(t) == 5:
940 pattern, s, outcome, repl, expected = t
941 elif len(t) == 3:
942 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000943 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000944 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000945
Guido van Rossum41360a41998-03-26 19:42:58 +0000946 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000947 obj = re.compile(pattern)
948 except re.error:
949 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000950 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000951 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000952 except KeyboardInterrupt: raise KeyboardInterrupt
953 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000954 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000955 if verbose:
956 traceback.print_exc(file=sys.stdout)
957 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000958 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000959 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000960 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000961 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000962 if outcome == SYNTAX_ERROR:
963 # This should have been a syntax error; forget it.
964 pass
965 elif outcome == FAIL:
966 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000967 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000968 elif outcome == SUCCEED:
969 if result is not None:
970 # Matched, as expected, so now we compute the
971 # result string and compare it to our expected result.
972 start, end = result.span(0)
973 vardict={'found': result.group(0),
974 'groups': result.group(),
975 'flags': result.re.flags}
976 for i in range(1, 100):
977 try:
978 gi = result.group(i)
979 # Special hack because else the string concat fails:
980 if gi is None:
981 gi = "None"
982 except IndexError:
983 gi = "Error"
984 vardict['g%d' % i] = gi
985 for i in result.re.groupindex.keys():
986 try:
987 gi = result.group(i)
988 if gi is None:
989 gi = "None"
990 except IndexError:
991 gi = "Error"
992 vardict[i] = gi
993 repl = eval(repl, vardict)
994 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000995 print('=== grouping error', t, end=' ')
996 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000997 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000998 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000999
Antoine Pitrou22628c42008-07-22 17:53:22 +00001000 # Try the match with both pattern and string converted to
1001 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001002 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001003 bpat = bytes(pattern, "ascii")
1004 bs = bytes(s, "ascii")
1005 except UnicodeEncodeError:
1006 # skip non-ascii tests
1007 pass
1008 else:
1009 try:
1010 bpat = re.compile(bpat)
1011 except Exception:
1012 print('=== Fails on bytes pattern compile', t)
1013 if verbose:
1014 traceback.print_exc(file=sys.stdout)
1015 else:
1016 bytes_result = bpat.search(bs)
1017 if bytes_result is None:
1018 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001019
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001020 # Try the match with the search area limited to the extent
1021 # of the match and see if it still succeeds. \B will
1022 # break (because it won't match at the end or start of a
1023 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001024
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001025 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1026 and result is not None:
1027 obj = re.compile(pattern)
1028 result = obj.search(s, result.start(0), result.end(0) + 1)
1029 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001030 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001031
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001032 # Try the match with IGNORECASE enabled, and check that it
1033 # still succeeds.
1034 obj = re.compile(pattern, re.IGNORECASE)
1035 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001036 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001037 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001038
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001039 # Try the match with LOCALE enabled, and check that it
1040 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001041 if '(?u)' not in pattern:
1042 obj = re.compile(pattern, re.LOCALE)
1043 result = obj.search(s)
1044 if result is None:
1045 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001046
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001047 # Try the match with UNICODE locale enabled, and check
1048 # that it still succeeds.
1049 obj = re.compile(pattern, re.UNICODE)
1050 result = obj.search(s)
1051 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001052 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001053
Gregory P. Smith5a631832010-07-27 05:31:29 +00001054
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001055def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001056 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001057 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001058
1059if __name__ == "__main__":
1060 test_main()