blob: ef19164ed818e90f7a6f5ea2bbed7efc29bd15ff [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2 cpython_only
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00005from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02006import sys
7import string
8import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00009from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000010
Guido van Rossum23b22571997-07-17 22:36:14 +000011# Misc tests from Tim Peters' re.doc
12
Just van Rossum6802c6e2003-07-02 14:36:59 +000013# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020014# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000015# cover most of the code.
16
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000018
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000020
Benjamin Petersone48944b2012-03-07 14:50:25 -060021 def test_keep_buffer(self):
22 # See bug 14212
23 b = bytearray(b'x')
24 it = re.finditer(b'a', b)
25 with self.assertRaises(BufferError):
26 b.extend(b'x'*400)
27 list(it)
28 del it
29 gc_collect()
30 b.extend(b'x'*400)
31
Raymond Hettinger027bb632004-05-31 03:09:25 +000032 def test_weakref(self):
33 s = 'QabbbcR'
34 x = re.compile('ab+c')
35 y = proxy(x)
36 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
37
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def test_search_star_plus(self):
39 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
40 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
41 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
42 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000043 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
45 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
46 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
47 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000048 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000049
Skip Montanaro8ed06da2003-04-24 19:43:18 +000050 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000051 int_value = int(matchobj.group(0))
52 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 def test_basic_re_sub(self):
55 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
56 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
57 '9.3 -3 24x100y')
58 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
59 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
62 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000063
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 s = r"\1\1"
65 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
66 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
67 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000068
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
70 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
71 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
72 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000073
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
75 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
76 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
77 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
78 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000079
Skip Montanaro8ed06da2003-04-24 19:43:18 +000080 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000081
Skip Montanaro2726fcd2003-04-25 14:31:54 +000082 def test_bug_449964(self):
83 # fails for group followed by other escape
84 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
85 'xx\bxx\b')
86
87 def test_bug_449000(self):
88 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000089 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
90 'abc\ndef\n')
91 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
92 'abc\ndef\n')
93 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
94 'abc\ndef\n')
95 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
96 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000097
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000098 def test_bug_1661(self):
99 # Verify that flags do not get silently ignored with compiled patterns
100 pattern = re.compile('.')
101 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
102 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
103 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
104 self.assertRaises(ValueError, re.compile, pattern, re.I)
105
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000106 def test_bug_3629(self):
107 # A regex that triggered a bug in the sre-code validator
108 re.compile("(?P<quote>)(?(quote))")
109
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000110 def test_sub_template_numeric_escape(self):
111 # bug 776311 and friends
112 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
113 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
114 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
115 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
116 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
117 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
118 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
119
120 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
121 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
122
123 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
124 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
125 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
126 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
127 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
128
129 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
130 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000131
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000132 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
133 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
134 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
141 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
142 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
143 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
144
145 # in python2.3 (etc), these loop endlessly in sre_parser.py
146 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
147 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
148 'xz8')
149 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
150 'xza')
151
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000152 def test_qualified_re_sub(self):
153 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
154 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000155
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000156 def test_bug_114660(self):
157 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
158 'hello there')
159
160 def test_bug_462270(self):
161 # Test for empty sub() behaviour, see SF bug #462270
162 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
163 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
164
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200165 def test_symbolic_groups(self):
166 re.compile('(?P<a>x)(?P=a)(?(a)y)')
167 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
168 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
169 self.assertRaises(re.error, re.compile, '(?Px)')
170 self.assertRaises(re.error, re.compile, '(?P=)')
171 self.assertRaises(re.error, re.compile, '(?P=1)')
172 self.assertRaises(re.error, re.compile, '(?P=a)')
173 self.assertRaises(re.error, re.compile, '(?P=a1)')
174 self.assertRaises(re.error, re.compile, '(?P=a.)')
175 self.assertRaises(re.error, re.compile, '(?P<)')
176 self.assertRaises(re.error, re.compile, '(?P<>)')
177 self.assertRaises(re.error, re.compile, '(?P<1>)')
178 self.assertRaises(re.error, re.compile, '(?P<a.>)')
179 self.assertRaises(re.error, re.compile, '(?())')
180 self.assertRaises(re.error, re.compile, '(?(a))')
181 self.assertRaises(re.error, re.compile, '(?(1a))')
182 self.assertRaises(re.error, re.compile, '(?(a.))')
183
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000184 def test_symbolic_refs(self):
185 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
186 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
188 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200189 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000190 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
191 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
192 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
193 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000194 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000195
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000196 def test_re_subn(self):
197 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
198 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
199 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
200 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
201 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000202
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000203 def test_re_split(self):
204 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
205 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
206 self.assertEqual(re.split("(:*)", ":a:b::c"),
207 ['', ':', 'a', ':', 'b', '::', 'c'])
208 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
209 self.assertEqual(re.split("(:)*", ":a:b::c"),
210 ['', ':', 'a', ':', 'b', ':', 'c'])
211 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
212 ['', ':', 'a', ':b::', 'c'])
213 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
214 ['', None, ':', 'a', None, ':', '', 'b', None, '',
215 None, '::', 'c'])
216 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
217 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000218
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000219 def test_qualified_re_split(self):
220 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
221 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
222 self.assertEqual(re.split("(:)", ":a:b::c", 2),
223 ['', ':', 'a', ':', 'b::c'])
224 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
225 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000227 def test_re_findall(self):
228 self.assertEqual(re.findall(":+", "abc"), [])
229 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
230 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
231 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
232 (":", ":"),
233 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000234
Skip Montanaro5ba00542003-04-25 16:00:14 +0000235 def test_bug_117612(self):
236 self.assertEqual(re.findall(r"(a|(b))", "aba"),
237 [("a", ""),("b", "b"),("a", "")])
238
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000239 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000240 self.assertEqual(re.match('a', 'a').groups(), ())
241 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
242 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
243 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
244 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000245
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000246 pat = re.compile('((a)|(b))(c)?')
247 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
248 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
249 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
250 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
251 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000252
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000253 # A single group
254 m = re.match('(a)', 'a')
255 self.assertEqual(m.group(0), 'a')
256 self.assertEqual(m.group(0), 'a')
257 self.assertEqual(m.group(1), 'a')
258 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000259
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000260 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
261 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
262 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
263 (None, 'b', None))
264 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000265
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000266 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000267 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
268 ('(', 'a'))
269 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
270 (None, 'a'))
271 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
272 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
273 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
274 ('a', 'b'))
275 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
276 (None, 'd'))
277 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
278 (None, 'd'))
279 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
280 ('a', ''))
281
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000282 # Tests for bug #1177831: exercise groups other than the first group
283 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
284 self.assertEqual(p.match('abc').groups(),
285 ('a', 'b', 'c'))
286 self.assertEqual(p.match('ad').groups(),
287 ('a', None, 'd'))
288 self.assertEqual(p.match('abd'), None)
289 self.assertEqual(p.match('ac'), None)
290
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000291
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000292 def test_re_groupref(self):
293 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
294 ('|', 'a'))
295 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
296 (None, 'a'))
297 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
298 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
299 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
300 ('a', 'a'))
301 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
302 (None, None))
303
304 def test_groupdict(self):
305 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
306 'first second').groupdict(),
307 {'first':'first', 'second':'second'})
308
309 def test_expand(self):
310 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
311 "first second")
312 .expand(r"\2 \1 \g<second> \g<first>"),
313 "second first second first")
314
315 def test_repeat_minmax(self):
316 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
317 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
318 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
319 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
320
321 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
322 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
323 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
324 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
325 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
326 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
327 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
328 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
329
330 self.assertEqual(re.match("^x{1}$", "xxx"), None)
331 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
332 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
333 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
334
335 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
336 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
337 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
338 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
339 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
340 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
341 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
342 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
343
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000344 self.assertEqual(re.match("^x{}$", "xxx"), None)
345 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
346
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000347 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000348 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000349 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000350 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
351 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
352 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
353 {'first': 1, 'other': 2})
354
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000355 self.assertEqual(re.match("(a)", "a").pos, 0)
356 self.assertEqual(re.match("(a)", "a").endpos, 1)
357 self.assertEqual(re.match("(a)", "a").string, "a")
358 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
359 self.assertNotEqual(re.match("(a)", "a").re, None)
360
361 def test_special_escapes(self):
362 self.assertEqual(re.search(r"\b(b.)\b",
363 "abcd abc bcd bx").group(1), "bx")
364 self.assertEqual(re.search(r"\B(b.)\B",
365 "abc bcd bc abxd").group(1), "bx")
366 self.assertEqual(re.search(r"\b(b.)\b",
367 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
368 self.assertEqual(re.search(r"\B(b.)\B",
369 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
370 self.assertEqual(re.search(r"\b(b.)\b",
371 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
372 self.assertEqual(re.search(r"\B(b.)\B",
373 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
374 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
375 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
376 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
377 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000378 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000379 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000380 "abc bcd bc abxd").group(1), "bx")
381 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
382 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000384 self.assertEqual(re.search(r"\d\D\w\W\s\S",
385 "1aa! a").group(0), "1aa! a")
386 self.assertEqual(re.search(r"\d\D\w\W\s\S",
387 "1aa! a", re.LOCALE).group(0), "1aa! a")
388 self.assertEqual(re.search(r"\d\D\w\W\s\S",
389 "1aa! a", re.UNICODE).group(0), "1aa! a")
390
Ezio Melotti5a045b92012-02-29 11:48:44 +0200391 def test_string_boundaries(self):
392 # See http://bugs.python.org/issue10713
393 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
394 "abc")
395 # There's a word boundary at the start of a string.
396 self.assertTrue(re.match(r"\b", "abc"))
397 # A non-empty string includes a non-boundary zero-length match.
398 self.assertTrue(re.search(r"\B", "abc"))
399 # There is no non-boundary match at the start of a string.
400 self.assertFalse(re.match(r"\B", "abc"))
401 # However, an empty string contains no word boundaries, and also no
402 # non-boundaries.
403 self.assertEqual(re.search(r"\B", ""), None)
404 # This one is questionable and different from the perlre behaviour,
405 # but describes current behavior.
406 self.assertEqual(re.search(r"\b", ""), None)
407 # A single word-character string has two boundaries, but no
408 # non-boundary gaps.
409 self.assertEqual(len(re.findall(r"\b", "a")), 2)
410 self.assertEqual(len(re.findall(r"\B", "a")), 0)
411 # If there are no words, there are no boundaries
412 self.assertEqual(len(re.findall(r"\b", " ")), 0)
413 self.assertEqual(len(re.findall(r"\b", " ")), 0)
414 # Can match around the whitespace.
415 self.assertEqual(len(re.findall(r"\B", " ")), 2)
416
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000417 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000418 self.assertEqual(re.match("([\u2222\u2223])",
419 "\u2222").group(1), "\u2222")
420 self.assertEqual(re.match("([\u2222\u2223])",
421 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000422
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100423 def test_big_codesize(self):
424 # Issue #1160
425 r = re.compile('|'.join(('%d'%x for x in range(10000))))
426 self.assertIsNotNone(r.match('1000'))
427 self.assertIsNotNone(r.match('9999'))
428
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000429 def test_anyall(self):
430 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
431 "a\nb")
432 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
433 "a\n\nb")
434
435 def test_non_consuming(self):
436 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
437 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
438 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
439 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
440 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
441 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
442 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
443
444 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
445 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
446 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
447 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
448
449 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000450 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
451 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000452 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
453 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
454 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
455 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
456 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
457 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
458 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
459 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
460
461 def test_category(self):
462 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
463
464 def test_getlower(self):
465 import _sre
466 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
467 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
468 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
469
470 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000471 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000472
473 def test_not_literal(self):
474 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
475 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
476
477 def test_search_coverage(self):
478 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
479 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
480
Ezio Melottid2114eb2011-03-25 14:08:44 +0200481 def assertMatch(self, pattern, text, match=None, span=None,
482 matcher=re.match):
483 if match is None and span is None:
484 # the pattern matches the whole text
485 match = text
486 span = (0, len(text))
487 elif match is None or span is None:
488 raise ValueError('If match is not None, span should be specified '
489 '(and vice versa).')
490 m = matcher(pattern, text)
491 self.assertTrue(m)
492 self.assertEqual(m.group(), match)
493 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000494
Ezio Melottid2114eb2011-03-25 14:08:44 +0200495 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300496 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200497 p = ''.join(chr(i) for i in range(256))
498 for c in p:
499 if c in alnum_chars:
500 self.assertEqual(re.escape(c), c)
501 elif c == '\x00':
502 self.assertEqual(re.escape(c), '\\000')
503 else:
504 self.assertEqual(re.escape(c), '\\' + c)
505 self.assertMatch(re.escape(c), c)
506 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000507
Guido van Rossum698280d2008-09-10 17:44:35 +0000508 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300509 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200510 p = bytes(range(256))
511 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000512 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200513 if b in alnum_chars:
514 self.assertEqual(re.escape(b), b)
515 elif i == 0:
516 self.assertEqual(re.escape(b), b'\\000')
517 else:
518 self.assertEqual(re.escape(b), b'\\' + b)
519 self.assertMatch(re.escape(b), b)
520 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000521
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200522 def test_re_escape_non_ascii(self):
523 s = 'xxx\u2620\u2620\u2620xxx'
524 s_escaped = re.escape(s)
525 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
526 self.assertMatch(s_escaped, s)
527 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
528 'x\u2620\u2620\u2620x', (2, 7), re.search)
529
530 def test_re_escape_non_ascii_bytes(self):
531 b = 'y\u2620y\u2620y'.encode('utf-8')
532 b_escaped = re.escape(b)
533 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
534 self.assertMatch(b_escaped, b)
535 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
536 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000537
Skip Montanaro1e703c62003-04-25 15:40:28 +0000538 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000539 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
540 s = pickle.dumps(oldpat)
541 newpat = pickle.loads(s)
542 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000543
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000544 def test_constants(self):
545 self.assertEqual(re.I, re.IGNORECASE)
546 self.assertEqual(re.L, re.LOCALE)
547 self.assertEqual(re.M, re.MULTILINE)
548 self.assertEqual(re.S, re.DOTALL)
549 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000550
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000551 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000552 for flag in [re.I, re.M, re.X, re.S, re.L]:
553 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000554
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000555 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200556 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
557 if i < 256:
558 self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
559 self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
560 self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
561 self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
562 self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
563 self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
564 if i < 0x10000:
565 self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
566 self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
567 self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
568 self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
569 self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
570 self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
571 self.assertIsNotNone(re.match(r"\0", "\000"))
572 self.assertIsNotNone(re.match(r"\08", "\0008"))
573 self.assertIsNotNone(re.match(r"\01", "\001"))
574 self.assertIsNotNone(re.match(r"\018", "\0018"))
575 self.assertIsNotNone(re.match(r"\567", chr(0o167)))
576 self.assertRaises(re.error, re.match, r"\911", "")
577 self.assertRaises(re.error, re.match, r"\x1", "")
578 self.assertRaises(re.error, re.match, r"\x1z", "")
579 self.assertRaises(re.error, re.match, r"\u123", "")
580 self.assertRaises(re.error, re.match, r"\u123z", "")
581 self.assertRaises(re.error, re.match, r"\U0001234", "")
582 self.assertRaises(re.error, re.match, r"\U0001234z", "")
583 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000584
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000585 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200586 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
587 if i < 256:
588 self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
589 self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
590 self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
591 self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
592 self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
593 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
594 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
595 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
596 if i < 0x10000:
597 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
598 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
599 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
600 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
601 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
602 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Ezio Melottieadece22013-02-23 08:40:07 +0200603 self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200604 self.assertRaises(re.error, re.match, r"[\911]", "")
605 self.assertRaises(re.error, re.match, r"[\x1z]", "")
606 self.assertRaises(re.error, re.match, r"[\u123z]", "")
607 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
608 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
609
610 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000611 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Antoine Pitrou463badf2012-06-23 13:29:19 +0200612 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
613 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
614 self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
615 self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
616 self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
617 self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
618 self.assertIsNotNone(re.match(br"\u", b'u'))
619 self.assertIsNotNone(re.match(br"\U", b'U'))
620 self.assertIsNotNone(re.match(br"\0", b"\000"))
621 self.assertIsNotNone(re.match(br"\08", b"\0008"))
622 self.assertIsNotNone(re.match(br"\01", b"\001"))
623 self.assertIsNotNone(re.match(br"\018", b"\0018"))
624 self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
625 self.assertRaises(re.error, re.match, br"\911", b"")
626 self.assertRaises(re.error, re.match, br"\x1", b"")
627 self.assertRaises(re.error, re.match, br"\x1z", b"")
628
629 def test_sre_byte_class_literals(self):
630 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
631 self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i])))
632 self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i])))
633 self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i])))
634 self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
635 self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
636 self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
637 self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
638 self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
639 self.assertIsNotNone(re.match(br"[\u]", b'u'))
640 self.assertIsNotNone(re.match(br"[\U]", b'U'))
641 self.assertRaises(re.error, re.match, br"[\911]", "")
642 self.assertRaises(re.error, re.match, br"[\x1z]", "")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000643
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000644 def test_bug_113254(self):
645 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
646 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
647 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
648
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000649 def test_bug_527371(self):
650 # bug described in patches 527371/672491
651 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
652 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
653 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
654 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
655 self.assertEqual(re.match("((a))", "a").lastindex, 1)
656
657 def test_bug_545855(self):
658 # bug 545855 -- This pattern failed to cause a compile error as it
659 # should, instead provoking a TypeError.
660 self.assertRaises(re.error, re.compile, 'foo[a-')
661
662 def test_bug_418626(self):
663 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
664 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
665 # pattern '*?' on a long string.
666 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
667 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
668 20003)
669 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000670 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000671 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000672 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000673
674 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000675 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000676 self.assertEqual(re.compile(pat) and 1, 1)
677
Skip Montanaro1e703c62003-04-25 15:40:28 +0000678 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000679 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000680 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000681 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
682 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
683 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000684
Serhiy Storchakafa468162013-02-16 21:23:53 +0200685 def test_unlimited_zero_width_repeat(self):
686 # Issue #9669
687 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
688 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
689 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
690 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
691 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
692 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
693
Skip Montanaro1e703c62003-04-25 15:40:28 +0000694 def test_scanner(self):
695 def s_ident(scanner, token): return token
696 def s_operator(scanner, token): return "op%s" % token
697 def s_float(scanner, token): return float(token)
698 def s_int(scanner, token): return int(token)
699
700 scanner = Scanner([
701 (r"[a-zA-Z_]\w*", s_ident),
702 (r"\d+\.\d*", s_float),
703 (r"\d+", s_int),
704 (r"=|\+|-|\*|/", s_operator),
705 (r"\s+", None),
706 ])
707
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000708 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
709
Skip Montanaro1e703c62003-04-25 15:40:28 +0000710 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
711 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
712 'op+', 'bar'], ''))
713
Skip Montanaro5ba00542003-04-25 16:00:14 +0000714 def test_bug_448951(self):
715 # bug 448951 (similar to 429357, but with single char match)
716 # (Also test greedy matches.)
717 for op in '','?','*':
718 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
719 (None, None))
720 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
721 ('a:', 'a'))
722
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000723 def test_bug_725106(self):
724 # capturing groups in alternatives in repeats
725 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
726 ('b', 'a'))
727 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
728 ('c', 'b'))
729 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
730 ('b', None))
731 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
732 ('b', None))
733 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
734 ('b', 'a'))
735 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
736 ('c', 'b'))
737 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
738 ('b', None))
739 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
740 ('b', None))
741
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000742 def test_bug_725149(self):
743 # mark_stack_base restoring before restoring marks
744 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
745 ('a', None))
746 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
747 ('a', None, None))
748
Just van Rossum12723ba2003-07-02 20:03:04 +0000749 def test_bug_764548(self):
750 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000751 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000752 pat = re.compile(my_unicode("abc"))
753 self.assertEqual(pat.match("xyz"), None)
754
Skip Montanaro5ba00542003-04-25 16:00:14 +0000755 def test_finditer(self):
756 iter = re.finditer(r":+", "a:b::c:::d")
757 self.assertEqual([item.group(0) for item in iter],
758 [":", "::", ":::"])
759
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600760 pat = re.compile(r":+")
761 iter = pat.finditer("a:b::c:::d", 1, 10)
762 self.assertEqual([item.group(0) for item in iter],
763 [":", "::", ":::"])
764
765 pat = re.compile(r":+")
766 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
767 self.assertEqual([item.group(0) for item in iter],
768 [":", "::", ":::"])
769
770 pat = re.compile(r":+")
771 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
772 self.assertEqual([item.group(0) for item in iter],
773 [":", "::", ":::"])
774
775 pat = re.compile(r":+")
776 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
777 self.assertEqual([item.group(0) for item in iter],
778 ["::", "::"])
779
Thomas Wouters40a088d2008-03-18 20:19:54 +0000780 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000781 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000782 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000783
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000784 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000785 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000786 self.assertEqual(re.compile(pattern).split("a.b.c"),
787 ['a','b','c'])
788
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000789 def test_bug_581080(self):
790 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000791 self.assertEqual(next(iter).span(), (1,2))
792 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000793
794 scanner = re.compile(r"\s").scanner("a b")
795 self.assertEqual(scanner.search().span(), (1, 2))
796 self.assertEqual(scanner.search(), None)
797
798 def test_bug_817234(self):
799 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000800 self.assertEqual(next(iter).span(), (0, 4))
801 self.assertEqual(next(iter).span(), (4, 4))
802 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000803
Mark Dickinson1f268282009-07-28 17:22:36 +0000804 def test_bug_6561(self):
805 # '\d' should match characters in Unicode category 'Nd'
806 # (Number, Decimal Digit), but not those in 'Nl' (Number,
807 # Letter) or 'No' (Number, Other).
808 decimal_digits = [
809 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
810 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
811 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
812 ]
813 for x in decimal_digits:
814 self.assertEqual(re.match('^\d$', x).group(0), x)
815
816 not_decimal_digits = [
817 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
818 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
819 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
820 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
821 ]
822 for x in not_decimal_digits:
823 self.assertIsNone(re.match('^\d$', x))
824
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 def test_empty_array(self):
826 # SF buf 1647541
827 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000828 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000830 self.assertEqual(re.compile(b"bla").match(a), None)
831 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000832
Christian Heimes072c0f12008-01-03 23:01:04 +0000833 def test_inline_flags(self):
834 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000835 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
836 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000837
838 p = re.compile(upper_char, re.I | re.U)
839 q = p.match(lower_char)
840 self.assertNotEqual(q, None)
841
842 p = re.compile(lower_char, re.I | re.U)
843 q = p.match(upper_char)
844 self.assertNotEqual(q, None)
845
846 p = re.compile('(?i)' + upper_char, re.U)
847 q = p.match(lower_char)
848 self.assertNotEqual(q, None)
849
850 p = re.compile('(?i)' + lower_char, re.U)
851 q = p.match(upper_char)
852 self.assertNotEqual(q, None)
853
854 p = re.compile('(?iu)' + upper_char)
855 q = p.match(lower_char)
856 self.assertNotEqual(q, None)
857
858 p = re.compile('(?iu)' + lower_char)
859 q = p.match(upper_char)
860 self.assertNotEqual(q, None)
861
Christian Heimes25bb7832008-01-11 16:17:00 +0000862 def test_dollar_matches_twice(self):
863 "$ matches the end of string, and just before the terminating \n"
864 pattern = re.compile('$')
865 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
866 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
867 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
868
869 pattern = re.compile('$', re.MULTILINE)
870 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
871 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
872 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
873
Antoine Pitroufd036452008-08-19 17:56:33 +0000874 def test_bytes_str_mixing(self):
875 # Mixing str and bytes is disallowed
876 pat = re.compile('.')
877 bpat = re.compile(b'.')
878 self.assertRaises(TypeError, pat.match, b'b')
879 self.assertRaises(TypeError, bpat.match, 'b')
880 self.assertRaises(TypeError, pat.sub, b'b', 'c')
881 self.assertRaises(TypeError, pat.sub, 'b', b'c')
882 self.assertRaises(TypeError, pat.sub, b'b', b'c')
883 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
884 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
885 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
886
887 def test_ascii_and_unicode_flag(self):
888 # String patterns
889 for flags in (0, re.UNICODE):
890 pat = re.compile('\xc0', flags | re.IGNORECASE)
891 self.assertNotEqual(pat.match('\xe0'), None)
892 pat = re.compile('\w', flags)
893 self.assertNotEqual(pat.match('\xe0'), None)
894 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
895 self.assertEqual(pat.match('\xe0'), None)
896 pat = re.compile('(?a)\xc0', re.IGNORECASE)
897 self.assertEqual(pat.match('\xe0'), None)
898 pat = re.compile('\w', re.ASCII)
899 self.assertEqual(pat.match('\xe0'), None)
900 pat = re.compile('(?a)\w')
901 self.assertEqual(pat.match('\xe0'), None)
902 # Bytes patterns
903 for flags in (0, re.ASCII):
904 pat = re.compile(b'\xc0', re.IGNORECASE)
905 self.assertEqual(pat.match(b'\xe0'), None)
906 pat = re.compile(b'\w')
907 self.assertEqual(pat.match(b'\xe0'), None)
908 # Incompatibilities
909 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
910 self.assertRaises(ValueError, re.compile, b'(?u)\w')
911 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
912 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
913 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
914 self.assertRaises(ValueError, re.compile, '(?au)\w')
915
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000916 def test_bug_6509(self):
917 # Replacement strings of both types must parse properly.
918 # all strings
919 pat = re.compile('a(\w)')
920 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
921 pat = re.compile('a(.)')
922 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
923 pat = re.compile('..')
924 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
925
926 # all bytes
927 pat = re.compile(b'a(\w)')
928 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
929 pat = re.compile(b'a(.)')
930 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
931 pat = re.compile(b'..')
932 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
933
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000934 def test_dealloc(self):
935 # issue 3299: check for segfault in debug build
936 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000937 # the overflow limit is different on wide and narrow builds and it
938 # depends on the definition of SRE_CODE (see sre.h).
939 # 2**128 should be big enough to overflow on both. For smaller values
940 # a RuntimeError is raised instead of OverflowError.
941 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000942 self.assertRaises(TypeError, re.finditer, "a", {})
943 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000944 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000945
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946 def test_search_dot_unicode(self):
947 self.assertIsNotNone(re.search("123.*-", '123abc-'))
948 self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
949 self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
950 self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
951 self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
952
Ezio Melottidf723e12012-03-13 01:29:48 +0200953 def test_compile(self):
954 # Test return value when given string and pattern as parameter
955 pattern = re.compile('random pattern')
956 self.assertIsInstance(pattern, re._pattern_type)
957 same_pattern = re.compile(pattern)
958 self.assertIsInstance(same_pattern, re._pattern_type)
959 self.assertIs(same_pattern, pattern)
960 # Test behaviour when not given a string or pattern as parameter
961 self.assertRaises(TypeError, re.compile, 0)
962
Ezio Melottife8e6e72013-01-11 08:32:01 +0200963 def test_bug_13899(self):
964 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
965 # nothing. Ditto B and Z.
966 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
967 ['A', 'B', '\b', 'C', 'Z'])
968
Antoine Pitroub33941a2012-12-03 20:55:56 +0100969 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100970 def test_large_search(self, size):
971 # Issue #10182: indices were 32-bit-truncated.
972 s = 'a' * size
973 m = re.search('$', s)
974 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100975 self.assertEqual(m.start(), size)
976 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100977
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100978 # The huge memuse is because of re.sub() using a list and a join()
979 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +0100980 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100981 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100982 # Issue #10182: indices were 32-bit-truncated.
983 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100984 r, n = re.subn('', '', s)
985 self.assertEqual(r, s)
986 self.assertEqual(n, size + 1)
987
Serhiy Storchakac1b59d42012-12-29 23:38:48 +0200988 def test_bug_16688(self):
989 # Issue 16688: Backreferences make case-insensitive regex fail on
990 # non-ASCII strings.
991 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
992 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100993
Serhiy Storchaka70ca0212013-02-16 16:47:47 +0200994 def test_repeat_minmax_overflow(self):
995 # Issue #13169
996 string = "x" * 100000
997 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
998 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
999 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1000 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1001 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1002 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1003 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1004 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1005 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1006 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1007 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1008
1009 @cpython_only
1010 def test_repeat_minmax_overflow_maxrepeat(self):
1011 try:
1012 from _sre import MAXREPEAT
1013 except ImportError:
1014 self.skipTest('requires _sre.MAXREPEAT constant')
1015 string = "x" * 100000
1016 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1017 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1018 (0, 100000))
1019 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1020 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1021 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1022 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1023
1024
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001025def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +00001026 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001027 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001028 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001029 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001030 # To save time, only run the first and last 10 tests
1031 #tests = tests[:10] + tests[-10:]
1032 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001033
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001034 for t in tests:
1035 sys.stdout.flush()
1036 pattern = s = outcome = repl = expected = None
1037 if len(t) == 5:
1038 pattern, s, outcome, repl, expected = t
1039 elif len(t) == 3:
1040 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001041 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001042 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001043
Guido van Rossum41360a41998-03-26 19:42:58 +00001044 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001045 obj = re.compile(pattern)
1046 except re.error:
1047 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001048 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001049 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001050 except KeyboardInterrupt: raise KeyboardInterrupt
1051 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001052 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001053 if verbose:
1054 traceback.print_exc(file=sys.stdout)
1055 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001056 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001057 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001058 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001059 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001060 if outcome == SYNTAX_ERROR:
1061 # This should have been a syntax error; forget it.
1062 pass
1063 elif outcome == FAIL:
1064 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001065 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001066 elif outcome == SUCCEED:
1067 if result is not None:
1068 # Matched, as expected, so now we compute the
1069 # result string and compare it to our expected result.
1070 start, end = result.span(0)
1071 vardict={'found': result.group(0),
1072 'groups': result.group(),
1073 'flags': result.re.flags}
1074 for i in range(1, 100):
1075 try:
1076 gi = result.group(i)
1077 # Special hack because else the string concat fails:
1078 if gi is None:
1079 gi = "None"
1080 except IndexError:
1081 gi = "Error"
1082 vardict['g%d' % i] = gi
1083 for i in result.re.groupindex.keys():
1084 try:
1085 gi = result.group(i)
1086 if gi is None:
1087 gi = "None"
1088 except IndexError:
1089 gi = "Error"
1090 vardict[i] = gi
1091 repl = eval(repl, vardict)
1092 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001093 print('=== grouping error', t, end=' ')
1094 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001095 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001096 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001097
Antoine Pitrou22628c42008-07-22 17:53:22 +00001098 # Try the match with both pattern and string converted to
1099 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001100 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001101 bpat = bytes(pattern, "ascii")
1102 bs = bytes(s, "ascii")
1103 except UnicodeEncodeError:
1104 # skip non-ascii tests
1105 pass
1106 else:
1107 try:
1108 bpat = re.compile(bpat)
1109 except Exception:
1110 print('=== Fails on bytes pattern compile', t)
1111 if verbose:
1112 traceback.print_exc(file=sys.stdout)
1113 else:
1114 bytes_result = bpat.search(bs)
1115 if bytes_result is None:
1116 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001117
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001118 # Try the match with the search area limited to the extent
1119 # of the match and see if it still succeeds. \B will
1120 # break (because it won't match at the end or start of a
1121 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001122
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001123 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1124 and result is not None:
1125 obj = re.compile(pattern)
1126 result = obj.search(s, result.start(0), result.end(0) + 1)
1127 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001128 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001129
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001130 # Try the match with IGNORECASE enabled, and check that it
1131 # still succeeds.
1132 obj = re.compile(pattern, re.IGNORECASE)
1133 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001134 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001135 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001136
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001137 # Try the match with LOCALE enabled, and check that it
1138 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001139 if '(?u)' not in pattern:
1140 obj = re.compile(pattern, re.LOCALE)
1141 result = obj.search(s)
1142 if result is None:
1143 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001144
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001145 # Try the match with UNICODE locale enabled, and check
1146 # that it still succeeds.
1147 obj = re.compile(pattern, re.UNICODE)
1148 result = obj.search(s)
1149 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001150 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001151
Gregory P. Smith5a631832010-07-27 05:31:29 +00001152
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001153def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001154 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001155 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001156
1157if __name__ == "__main__":
1158 test_main()