blob: 2104437408df89fb46fdb7769bf80c65df85dc44 [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2 cpython_only
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00005from re import Scanner
R David Murray26dfaac92013-04-14 13:00:54 -04006import sre_constants
Ezio Melottid2114eb2011-03-25 14:08:44 +02007import sys
8import string
9import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Guido van Rossum23b22571997-07-17 22:36:14 +000012# Misc tests from Tim Peters' re.doc
13
Just van Rossum6802c6e2003-07-02 14:36:59 +000014# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020015# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000016# cover most of the code.
17
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000019
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000021
Benjamin Petersone48944b2012-03-07 14:50:25 -060022 def test_keep_buffer(self):
23 # See bug 14212
24 b = bytearray(b'x')
25 it = re.finditer(b'a', b)
26 with self.assertRaises(BufferError):
27 b.extend(b'x'*400)
28 list(it)
29 del it
30 gc_collect()
31 b.extend(b'x'*400)
32
Raymond Hettinger027bb632004-05-31 03:09:25 +000033 def test_weakref(self):
34 s = 'QabbbcR'
35 x = re.compile('ab+c')
36 y = proxy(x)
37 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
38
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def test_search_star_plus(self):
40 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
41 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
42 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
43 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000044 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000045 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
46 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
47 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
48 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000049 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000052 int_value = int(matchobj.group(0))
53 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000054
Skip Montanaro8ed06da2003-04-24 19:43:18 +000055 def test_basic_re_sub(self):
56 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
57 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
58 '9.3 -3 24x100y')
59 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
60 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
63 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 s = r"\1\1"
66 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
67 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
68 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
71 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
72 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
73 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000074
Skip Montanaro8ed06da2003-04-24 19:43:18 +000075 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
76 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
77 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
78 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
79 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000080
Skip Montanaro8ed06da2003-04-24 19:43:18 +000081 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000082
Skip Montanaro2726fcd2003-04-25 14:31:54 +000083 def test_bug_449964(self):
84 # fails for group followed by other escape
85 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
86 'xx\bxx\b')
87
88 def test_bug_449000(self):
89 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000090 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
91 'abc\ndef\n')
92 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
93 'abc\ndef\n')
94 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
95 'abc\ndef\n')
96 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
97 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000098
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000099 def test_bug_1661(self):
100 # Verify that flags do not get silently ignored with compiled patterns
101 pattern = re.compile('.')
102 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
103 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
104 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
105 self.assertRaises(ValueError, re.compile, pattern, re.I)
106
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000107 def test_bug_3629(self):
108 # A regex that triggered a bug in the sre-code validator
109 re.compile("(?P<quote>)(?(quote))")
110
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000111 def test_sub_template_numeric_escape(self):
112 # bug 776311 and friends
113 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
114 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
115 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
116 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
117 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
118 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
119 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
120
121 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
122 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
123
124 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
125 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
126 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
127 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
128 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
129
130 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
131 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000132
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000133 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
134 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
142 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
144 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
145
146 # in python2.3 (etc), these loop endlessly in sre_parser.py
147 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
148 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
149 'xz8')
150 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
151 'xza')
152
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000153 def test_qualified_re_sub(self):
154 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
155 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000156
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000157 def test_bug_114660(self):
158 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
159 'hello there')
160
161 def test_bug_462270(self):
162 # Test for empty sub() behaviour, see SF bug #462270
163 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
164 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
165
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200166 def test_symbolic_groups(self):
167 re.compile('(?P<a>x)(?P=a)(?(a)y)')
168 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
169 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
170 self.assertRaises(re.error, re.compile, '(?Px)')
171 self.assertRaises(re.error, re.compile, '(?P=)')
172 self.assertRaises(re.error, re.compile, '(?P=1)')
173 self.assertRaises(re.error, re.compile, '(?P=a)')
174 self.assertRaises(re.error, re.compile, '(?P=a1)')
175 self.assertRaises(re.error, re.compile, '(?P=a.)')
176 self.assertRaises(re.error, re.compile, '(?P<)')
177 self.assertRaises(re.error, re.compile, '(?P<>)')
178 self.assertRaises(re.error, re.compile, '(?P<1>)')
179 self.assertRaises(re.error, re.compile, '(?P<a.>)')
180 self.assertRaises(re.error, re.compile, '(?())')
181 self.assertRaises(re.error, re.compile, '(?(a))')
182 self.assertRaises(re.error, re.compile, '(?(1a))')
183 self.assertRaises(re.error, re.compile, '(?(a.))')
Georg Brandl1d472b72013-04-14 11:40:00 +0200184 # New valid/invalid identifiers in Python 3
185 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
186 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
187 self.assertRaises(re.error, re.compile, '(?P<©>x)')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200188
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000189 def test_symbolic_refs(self):
190 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
191 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
192 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
193 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200194 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
196 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
197 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
198 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Georg Brandl1d472b72013-04-14 11:40:00 +0200200 # New valid/invalid identifiers in Python 3
201 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
202 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000204
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000205 def test_re_subn(self):
206 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
207 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
208 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
209 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
210 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000211
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000212 def test_re_split(self):
213 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
214 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
215 self.assertEqual(re.split("(:*)", ":a:b::c"),
216 ['', ':', 'a', ':', 'b', '::', 'c'])
217 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
218 self.assertEqual(re.split("(:)*", ":a:b::c"),
219 ['', ':', 'a', ':', 'b', ':', 'c'])
220 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
221 ['', ':', 'a', ':b::', 'c'])
222 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
223 ['', None, ':', 'a', None, ':', '', 'b', None, '',
224 None, '::', 'c'])
225 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
226 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000227
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000228 def test_qualified_re_split(self):
229 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
230 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
231 self.assertEqual(re.split("(:)", ":a:b::c", 2),
232 ['', ':', 'a', ':', 'b::c'])
233 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
234 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000235
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000236 def test_re_findall(self):
237 self.assertEqual(re.findall(":+", "abc"), [])
238 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
239 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
240 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
241 (":", ":"),
242 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000243
Skip Montanaro5ba00542003-04-25 16:00:14 +0000244 def test_bug_117612(self):
245 self.assertEqual(re.findall(r"(a|(b))", "aba"),
246 [("a", ""),("b", "b"),("a", "")])
247
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000248 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000249 self.assertEqual(re.match('a', 'a').groups(), ())
250 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
251 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
252 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
253 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000254
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000255 pat = re.compile('((a)|(b))(c)?')
256 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
257 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
258 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
259 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
260 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000261
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000262 # A single group
263 m = re.match('(a)', 'a')
264 self.assertEqual(m.group(0), 'a')
265 self.assertEqual(m.group(0), 'a')
266 self.assertEqual(m.group(1), 'a')
267 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000268
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000269 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
270 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
271 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
272 (None, 'b', None))
273 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000274
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000275 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000276 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
277 ('(', 'a'))
278 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
279 (None, 'a'))
280 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
282 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
283 ('a', 'b'))
284 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
285 (None, 'd'))
286 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
287 (None, 'd'))
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
289 ('a', ''))
290
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000291 # Tests for bug #1177831: exercise groups other than the first group
292 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
293 self.assertEqual(p.match('abc').groups(),
294 ('a', 'b', 'c'))
295 self.assertEqual(p.match('ad').groups(),
296 ('a', None, 'd'))
297 self.assertEqual(p.match('abd'), None)
298 self.assertEqual(p.match('ac'), None)
299
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000300
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000301 def test_re_groupref(self):
302 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
303 ('|', 'a'))
304 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
305 (None, 'a'))
306 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
308 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
309 ('a', 'a'))
310 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
311 (None, None))
312
313 def test_groupdict(self):
314 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
315 'first second').groupdict(),
316 {'first':'first', 'second':'second'})
317
318 def test_expand(self):
319 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
320 "first second")
321 .expand(r"\2 \1 \g<second> \g<first>"),
322 "second first second first")
323
324 def test_repeat_minmax(self):
325 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
326 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
327 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
328 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
329
330 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
331 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
332 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
333 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
334 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
335 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
338
339 self.assertEqual(re.match("^x{1}$", "xxx"), None)
340 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
341 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
342 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
343
344 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
345 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
346 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
347 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
348 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
349 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
352
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000353 self.assertEqual(re.match("^x{}$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
355
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000356 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000357 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000358 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000359 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
360 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
361 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
362 {'first': 1, 'other': 2})
363
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364 self.assertEqual(re.match("(a)", "a").pos, 0)
365 self.assertEqual(re.match("(a)", "a").endpos, 1)
366 self.assertEqual(re.match("(a)", "a").string, "a")
367 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
368 self.assertNotEqual(re.match("(a)", "a").re, None)
369
370 def test_special_escapes(self):
371 self.assertEqual(re.search(r"\b(b.)\b",
372 "abcd abc bcd bx").group(1), "bx")
373 self.assertEqual(re.search(r"\B(b.)\B",
374 "abc bcd bc abxd").group(1), "bx")
375 self.assertEqual(re.search(r"\b(b.)\b",
376 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
377 self.assertEqual(re.search(r"\B(b.)\B",
378 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
379 self.assertEqual(re.search(r"\b(b.)\b",
380 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
381 self.assertEqual(re.search(r"\B(b.)\B",
382 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
383 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
385 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
386 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000387 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000388 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000389 "abc bcd bc abxd").group(1), "bx")
390 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
392 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a").group(0), "1aa! a")
395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a", re.LOCALE).group(0), "1aa! a")
397 self.assertEqual(re.search(r"\d\D\w\W\s\S",
398 "1aa! a", re.UNICODE).group(0), "1aa! a")
399
Ezio Melotti5a045b92012-02-29 11:48:44 +0200400 def test_string_boundaries(self):
401 # See http://bugs.python.org/issue10713
402 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
403 "abc")
404 # There's a word boundary at the start of a string.
405 self.assertTrue(re.match(r"\b", "abc"))
406 # A non-empty string includes a non-boundary zero-length match.
407 self.assertTrue(re.search(r"\B", "abc"))
408 # There is no non-boundary match at the start of a string.
409 self.assertFalse(re.match(r"\B", "abc"))
410 # However, an empty string contains no word boundaries, and also no
411 # non-boundaries.
412 self.assertEqual(re.search(r"\B", ""), None)
413 # This one is questionable and different from the perlre behaviour,
414 # but describes current behavior.
415 self.assertEqual(re.search(r"\b", ""), None)
416 # A single word-character string has two boundaries, but no
417 # non-boundary gaps.
418 self.assertEqual(len(re.findall(r"\b", "a")), 2)
419 self.assertEqual(len(re.findall(r"\B", "a")), 0)
420 # If there are no words, there are no boundaries
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 self.assertEqual(len(re.findall(r"\b", " ")), 0)
423 # Can match around the whitespace.
424 self.assertEqual(len(re.findall(r"\B", " ")), 2)
425
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000426 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000427 self.assertEqual(re.match("([\u2222\u2223])",
428 "\u2222").group(1), "\u2222")
429 self.assertEqual(re.match("([\u2222\u2223])",
430 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000431
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100432 def test_big_codesize(self):
433 # Issue #1160
434 r = re.compile('|'.join(('%d'%x for x in range(10000))))
435 self.assertIsNotNone(r.match('1000'))
436 self.assertIsNotNone(r.match('9999'))
437
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000438 def test_anyall(self):
439 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
440 "a\nb")
441 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
442 "a\n\nb")
443
444 def test_non_consuming(self):
445 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
446 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
447 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
448 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
450 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
451 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
452
453 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
454 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
455 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
457
458 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000459 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
460 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000461 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
462 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
463 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
464 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
465 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
466 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
467 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
468 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
469
470 def test_category(self):
471 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
472
473 def test_getlower(self):
474 import _sre
475 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
476 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
477 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
478
479 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000480 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000481
482 def test_not_literal(self):
483 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
484 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
485
486 def test_search_coverage(self):
487 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
488 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
489
Ezio Melottid2114eb2011-03-25 14:08:44 +0200490 def assertMatch(self, pattern, text, match=None, span=None,
491 matcher=re.match):
492 if match is None and span is None:
493 # the pattern matches the whole text
494 match = text
495 span = (0, len(text))
496 elif match is None or span is None:
497 raise ValueError('If match is not None, span should be specified '
498 '(and vice versa).')
499 m = matcher(pattern, text)
500 self.assertTrue(m)
501 self.assertEqual(m.group(), match)
502 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000503
Ezio Melottid2114eb2011-03-25 14:08:44 +0200504 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300505 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200506 p = ''.join(chr(i) for i in range(256))
507 for c in p:
508 if c in alnum_chars:
509 self.assertEqual(re.escape(c), c)
510 elif c == '\x00':
511 self.assertEqual(re.escape(c), '\\000')
512 else:
513 self.assertEqual(re.escape(c), '\\' + c)
514 self.assertMatch(re.escape(c), c)
515 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000516
Guido van Rossum698280d2008-09-10 17:44:35 +0000517 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300518 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200519 p = bytes(range(256))
520 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000521 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200522 if b in alnum_chars:
523 self.assertEqual(re.escape(b), b)
524 elif i == 0:
525 self.assertEqual(re.escape(b), b'\\000')
526 else:
527 self.assertEqual(re.escape(b), b'\\' + b)
528 self.assertMatch(re.escape(b), b)
529 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000530
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200531 def test_re_escape_non_ascii(self):
532 s = 'xxx\u2620\u2620\u2620xxx'
533 s_escaped = re.escape(s)
534 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
535 self.assertMatch(s_escaped, s)
536 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
537 'x\u2620\u2620\u2620x', (2, 7), re.search)
538
539 def test_re_escape_non_ascii_bytes(self):
540 b = 'y\u2620y\u2620y'.encode('utf-8')
541 b_escaped = re.escape(b)
542 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
543 self.assertMatch(b_escaped, b)
544 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
545 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000546
Skip Montanaro1e703c62003-04-25 15:40:28 +0000547 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000548 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
549 s = pickle.dumps(oldpat)
550 newpat = pickle.loads(s)
551 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000552
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000553 def test_constants(self):
554 self.assertEqual(re.I, re.IGNORECASE)
555 self.assertEqual(re.L, re.LOCALE)
556 self.assertEqual(re.M, re.MULTILINE)
557 self.assertEqual(re.S, re.DOTALL)
558 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000559
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000560 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000561 for flag in [re.I, re.M, re.X, re.S, re.L]:
562 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000563
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000564 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200565 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
566 if i < 256:
567 self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
568 self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
569 self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
570 self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
571 self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
572 self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
573 if i < 0x10000:
574 self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
575 self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
576 self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
577 self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
578 self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
579 self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
580 self.assertIsNotNone(re.match(r"\0", "\000"))
581 self.assertIsNotNone(re.match(r"\08", "\0008"))
582 self.assertIsNotNone(re.match(r"\01", "\001"))
583 self.assertIsNotNone(re.match(r"\018", "\0018"))
584 self.assertIsNotNone(re.match(r"\567", chr(0o167)))
585 self.assertRaises(re.error, re.match, r"\911", "")
586 self.assertRaises(re.error, re.match, r"\x1", "")
587 self.assertRaises(re.error, re.match, r"\x1z", "")
588 self.assertRaises(re.error, re.match, r"\u123", "")
589 self.assertRaises(re.error, re.match, r"\u123z", "")
590 self.assertRaises(re.error, re.match, r"\U0001234", "")
591 self.assertRaises(re.error, re.match, r"\U0001234z", "")
592 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000593
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000594 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200595 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
596 if i < 256:
597 self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
598 self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
599 self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
600 self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
601 self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
602 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
603 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
604 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
605 if i < 0x10000:
606 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
607 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
608 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
609 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
610 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
611 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Ezio Melottieadece22013-02-23 08:40:07 +0200612 self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200613 self.assertRaises(re.error, re.match, r"[\911]", "")
614 self.assertRaises(re.error, re.match, r"[\x1z]", "")
615 self.assertRaises(re.error, re.match, r"[\u123z]", "")
616 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
617 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
618
619 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000620 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Antoine Pitrou463badf2012-06-23 13:29:19 +0200621 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
622 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
623 self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
624 self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
625 self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
626 self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
627 self.assertIsNotNone(re.match(br"\u", b'u'))
628 self.assertIsNotNone(re.match(br"\U", b'U'))
629 self.assertIsNotNone(re.match(br"\0", b"\000"))
630 self.assertIsNotNone(re.match(br"\08", b"\0008"))
631 self.assertIsNotNone(re.match(br"\01", b"\001"))
632 self.assertIsNotNone(re.match(br"\018", b"\0018"))
633 self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
634 self.assertRaises(re.error, re.match, br"\911", b"")
635 self.assertRaises(re.error, re.match, br"\x1", b"")
636 self.assertRaises(re.error, re.match, br"\x1z", b"")
637
638 def test_sre_byte_class_literals(self):
639 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
640 self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i])))
641 self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i])))
642 self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i])))
643 self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
644 self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
645 self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
646 self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
647 self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
648 self.assertIsNotNone(re.match(br"[\u]", b'u'))
649 self.assertIsNotNone(re.match(br"[\U]", b'U'))
650 self.assertRaises(re.error, re.match, br"[\911]", "")
651 self.assertRaises(re.error, re.match, br"[\x1z]", "")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000652
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000653 def test_bug_113254(self):
654 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
655 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
656 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
657
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000658 def test_bug_527371(self):
659 # bug described in patches 527371/672491
660 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
661 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
662 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
663 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
664 self.assertEqual(re.match("((a))", "a").lastindex, 1)
665
666 def test_bug_545855(self):
667 # bug 545855 -- This pattern failed to cause a compile error as it
668 # should, instead provoking a TypeError.
669 self.assertRaises(re.error, re.compile, 'foo[a-')
670
671 def test_bug_418626(self):
672 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
673 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
674 # pattern '*?' on a long string.
675 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
676 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
677 20003)
678 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000679 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000680 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000681 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000682
683 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000684 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000685 self.assertEqual(re.compile(pat) and 1, 1)
686
Skip Montanaro1e703c62003-04-25 15:40:28 +0000687 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000688 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000689 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000690 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
691 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
692 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000693
Serhiy Storchakafa468162013-02-16 21:23:53 +0200694 def test_unlimited_zero_width_repeat(self):
695 # Issue #9669
696 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
697 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
698 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
699 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
700 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
701 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
702
Skip Montanaro1e703c62003-04-25 15:40:28 +0000703 def test_scanner(self):
704 def s_ident(scanner, token): return token
705 def s_operator(scanner, token): return "op%s" % token
706 def s_float(scanner, token): return float(token)
707 def s_int(scanner, token): return int(token)
708
709 scanner = Scanner([
710 (r"[a-zA-Z_]\w*", s_ident),
711 (r"\d+\.\d*", s_float),
712 (r"\d+", s_int),
713 (r"=|\+|-|\*|/", s_operator),
714 (r"\s+", None),
715 ])
716
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000717 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
718
Skip Montanaro1e703c62003-04-25 15:40:28 +0000719 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
720 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
721 'op+', 'bar'], ''))
722
Skip Montanaro5ba00542003-04-25 16:00:14 +0000723 def test_bug_448951(self):
724 # bug 448951 (similar to 429357, but with single char match)
725 # (Also test greedy matches.)
726 for op in '','?','*':
727 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
728 (None, None))
729 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
730 ('a:', 'a'))
731
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000732 def test_bug_725106(self):
733 # capturing groups in alternatives in repeats
734 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
735 ('b', 'a'))
736 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
737 ('c', 'b'))
738 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
739 ('b', None))
740 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
741 ('b', None))
742 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
743 ('b', 'a'))
744 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
745 ('c', 'b'))
746 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
747 ('b', None))
748 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
749 ('b', None))
750
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000751 def test_bug_725149(self):
752 # mark_stack_base restoring before restoring marks
753 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
754 ('a', None))
755 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
756 ('a', None, None))
757
Just van Rossum12723ba2003-07-02 20:03:04 +0000758 def test_bug_764548(self):
759 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000760 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000761 pat = re.compile(my_unicode("abc"))
762 self.assertEqual(pat.match("xyz"), None)
763
Skip Montanaro5ba00542003-04-25 16:00:14 +0000764 def test_finditer(self):
765 iter = re.finditer(r":+", "a:b::c:::d")
766 self.assertEqual([item.group(0) for item in iter],
767 [":", "::", ":::"])
768
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600769 pat = re.compile(r":+")
770 iter = pat.finditer("a:b::c:::d", 1, 10)
771 self.assertEqual([item.group(0) for item in iter],
772 [":", "::", ":::"])
773
774 pat = re.compile(r":+")
775 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
776 self.assertEqual([item.group(0) for item in iter],
777 [":", "::", ":::"])
778
779 pat = re.compile(r":+")
780 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
781 self.assertEqual([item.group(0) for item in iter],
782 [":", "::", ":::"])
783
784 pat = re.compile(r":+")
785 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
786 self.assertEqual([item.group(0) for item in iter],
787 ["::", "::"])
788
Thomas Wouters40a088d2008-03-18 20:19:54 +0000789 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000790 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000791 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000792
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000793 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000794 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000795 self.assertEqual(re.compile(pattern).split("a.b.c"),
796 ['a','b','c'])
797
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000798 def test_bug_581080(self):
799 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000800 self.assertEqual(next(iter).span(), (1,2))
801 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000802
803 scanner = re.compile(r"\s").scanner("a b")
804 self.assertEqual(scanner.search().span(), (1, 2))
805 self.assertEqual(scanner.search(), None)
806
807 def test_bug_817234(self):
808 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000809 self.assertEqual(next(iter).span(), (0, 4))
810 self.assertEqual(next(iter).span(), (4, 4))
811 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000812
Mark Dickinson1f268282009-07-28 17:22:36 +0000813 def test_bug_6561(self):
814 # '\d' should match characters in Unicode category 'Nd'
815 # (Number, Decimal Digit), but not those in 'Nl' (Number,
816 # Letter) or 'No' (Number, Other).
817 decimal_digits = [
818 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
819 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
820 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
821 ]
822 for x in decimal_digits:
823 self.assertEqual(re.match('^\d$', x).group(0), x)
824
825 not_decimal_digits = [
826 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
827 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
828 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
829 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
830 ]
831 for x in not_decimal_digits:
832 self.assertIsNone(re.match('^\d$', x))
833
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834 def test_empty_array(self):
835 # SF buf 1647541
836 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000837 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000839 self.assertEqual(re.compile(b"bla").match(a), None)
840 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000841
Christian Heimes072c0f12008-01-03 23:01:04 +0000842 def test_inline_flags(self):
843 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000844 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
845 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000846
847 p = re.compile(upper_char, re.I | re.U)
848 q = p.match(lower_char)
849 self.assertNotEqual(q, None)
850
851 p = re.compile(lower_char, re.I | re.U)
852 q = p.match(upper_char)
853 self.assertNotEqual(q, None)
854
855 p = re.compile('(?i)' + upper_char, re.U)
856 q = p.match(lower_char)
857 self.assertNotEqual(q, None)
858
859 p = re.compile('(?i)' + lower_char, re.U)
860 q = p.match(upper_char)
861 self.assertNotEqual(q, None)
862
863 p = re.compile('(?iu)' + upper_char)
864 q = p.match(lower_char)
865 self.assertNotEqual(q, None)
866
867 p = re.compile('(?iu)' + lower_char)
868 q = p.match(upper_char)
869 self.assertNotEqual(q, None)
870
Christian Heimes25bb7832008-01-11 16:17:00 +0000871 def test_dollar_matches_twice(self):
872 "$ matches the end of string, and just before the terminating \n"
873 pattern = re.compile('$')
874 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
875 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
876 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
877
878 pattern = re.compile('$', re.MULTILINE)
879 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
880 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
881 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
882
Antoine Pitroufd036452008-08-19 17:56:33 +0000883 def test_bytes_str_mixing(self):
884 # Mixing str and bytes is disallowed
885 pat = re.compile('.')
886 bpat = re.compile(b'.')
887 self.assertRaises(TypeError, pat.match, b'b')
888 self.assertRaises(TypeError, bpat.match, 'b')
889 self.assertRaises(TypeError, pat.sub, b'b', 'c')
890 self.assertRaises(TypeError, pat.sub, 'b', b'c')
891 self.assertRaises(TypeError, pat.sub, b'b', b'c')
892 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
893 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
894 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
895
896 def test_ascii_and_unicode_flag(self):
897 # String patterns
898 for flags in (0, re.UNICODE):
899 pat = re.compile('\xc0', flags | re.IGNORECASE)
900 self.assertNotEqual(pat.match('\xe0'), None)
901 pat = re.compile('\w', flags)
902 self.assertNotEqual(pat.match('\xe0'), None)
903 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
904 self.assertEqual(pat.match('\xe0'), None)
905 pat = re.compile('(?a)\xc0', re.IGNORECASE)
906 self.assertEqual(pat.match('\xe0'), None)
907 pat = re.compile('\w', re.ASCII)
908 self.assertEqual(pat.match('\xe0'), None)
909 pat = re.compile('(?a)\w')
910 self.assertEqual(pat.match('\xe0'), None)
911 # Bytes patterns
912 for flags in (0, re.ASCII):
913 pat = re.compile(b'\xc0', re.IGNORECASE)
914 self.assertEqual(pat.match(b'\xe0'), None)
915 pat = re.compile(b'\w')
916 self.assertEqual(pat.match(b'\xe0'), None)
917 # Incompatibilities
918 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
919 self.assertRaises(ValueError, re.compile, b'(?u)\w')
920 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
921 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
922 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
923 self.assertRaises(ValueError, re.compile, '(?au)\w')
924
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000925 def test_bug_6509(self):
926 # Replacement strings of both types must parse properly.
927 # all strings
928 pat = re.compile('a(\w)')
929 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
930 pat = re.compile('a(.)')
931 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
932 pat = re.compile('..')
933 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
934
935 # all bytes
936 pat = re.compile(b'a(\w)')
937 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
938 pat = re.compile(b'a(.)')
939 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
940 pat = re.compile(b'..')
941 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
942
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000943 def test_dealloc(self):
944 # issue 3299: check for segfault in debug build
945 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000946 # the overflow limit is different on wide and narrow builds and it
947 # depends on the definition of SRE_CODE (see sre.h).
948 # 2**128 should be big enough to overflow on both. For smaller values
949 # a RuntimeError is raised instead of OverflowError.
950 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000951 self.assertRaises(TypeError, re.finditer, "a", {})
952 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000953 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000954
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 def test_search_dot_unicode(self):
956 self.assertIsNotNone(re.search("123.*-", '123abc-'))
957 self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
958 self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
959 self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
960 self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
961
Ezio Melottidf723e12012-03-13 01:29:48 +0200962 def test_compile(self):
963 # Test return value when given string and pattern as parameter
964 pattern = re.compile('random pattern')
965 self.assertIsInstance(pattern, re._pattern_type)
966 same_pattern = re.compile(pattern)
967 self.assertIsInstance(same_pattern, re._pattern_type)
968 self.assertIs(same_pattern, pattern)
969 # Test behaviour when not given a string or pattern as parameter
970 self.assertRaises(TypeError, re.compile, 0)
971
Ezio Melottife8e6e72013-01-11 08:32:01 +0200972 def test_bug_13899(self):
973 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
974 # nothing. Ditto B and Z.
975 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
976 ['A', 'B', '\b', 'C', 'Z'])
977
Antoine Pitroub33941a2012-12-03 20:55:56 +0100978 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100979 def test_large_search(self, size):
980 # Issue #10182: indices were 32-bit-truncated.
981 s = 'a' * size
982 m = re.search('$', s)
983 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100984 self.assertEqual(m.start(), size)
985 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100986
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100987 # The huge memuse is because of re.sub() using a list and a join()
988 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +0100989 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100990 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100991 # Issue #10182: indices were 32-bit-truncated.
992 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100993 r, n = re.subn('', '', s)
994 self.assertEqual(r, s)
995 self.assertEqual(n, size + 1)
996
Serhiy Storchakac1b59d42012-12-29 23:38:48 +0200997 def test_bug_16688(self):
998 # Issue 16688: Backreferences make case-insensitive regex fail on
999 # non-ASCII strings.
1000 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1001 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001002
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001003 def test_repeat_minmax_overflow(self):
1004 # Issue #13169
1005 string = "x" * 100000
1006 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1007 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1008 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1009 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1010 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1011 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1012 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1013 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1014 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1015 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1016 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1017
1018 @cpython_only
1019 def test_repeat_minmax_overflow_maxrepeat(self):
1020 try:
1021 from _sre import MAXREPEAT
1022 except ImportError:
1023 self.skipTest('requires _sre.MAXREPEAT constant')
1024 string = "x" * 100000
1025 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1026 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1027 (0, 100000))
1028 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1029 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1030 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1031 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1032
R David Murray26dfaac92013-04-14 13:00:54 -04001033 def test_backref_group_name_in_exception(self):
1034 # Issue 17341: Poor error message when compiling invalid regex
1035 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1036 re.compile('(?P=<foo>)')
1037
1038 def test_group_name_in_exception(self):
1039 # Issue 17341: Poor error message when compiling invalid regex
1040 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1041 re.compile('(?P<?foo>)')
1042
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001043 def test_issue17998(self):
1044 for reps in '*', '+', '?', '{1}':
1045 for mod in '', '?':
1046 pattern = '.' + reps + mod + 'yz'
1047 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1048 ['xyz'], msg=pattern)
1049 pattern = pattern.encode()
1050 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1051 [b'xyz'], msg=pattern)
1052
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001053
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001054 def test_bug_2537(self):
1055 # issue 2537: empty submatches
1056 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1057 for inner_op in ('{0,}', '*', '?'):
1058 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1059 m = r.match("xyyzy")
1060 self.assertEqual(m.group(0), "xyy")
1061 self.assertEqual(m.group(1), "")
1062 self.assertEqual(m.group(2), "y")
1063
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001064def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +00001065 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001066 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001067 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001068 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001069 # To save time, only run the first and last 10 tests
1070 #tests = tests[:10] + tests[-10:]
1071 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001072
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001073 for t in tests:
1074 sys.stdout.flush()
1075 pattern = s = outcome = repl = expected = None
1076 if len(t) == 5:
1077 pattern, s, outcome, repl, expected = t
1078 elif len(t) == 3:
1079 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001080 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001081 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001082
Guido van Rossum41360a41998-03-26 19:42:58 +00001083 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001084 obj = re.compile(pattern)
1085 except re.error:
1086 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001087 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001088 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001089 except KeyboardInterrupt: raise KeyboardInterrupt
1090 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001091 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001092 if verbose:
1093 traceback.print_exc(file=sys.stdout)
1094 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001095 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001096 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001097 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001098 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001099 if outcome == SYNTAX_ERROR:
1100 # This should have been a syntax error; forget it.
1101 pass
1102 elif outcome == FAIL:
1103 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001104 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001105 elif outcome == SUCCEED:
1106 if result is not None:
1107 # Matched, as expected, so now we compute the
1108 # result string and compare it to our expected result.
1109 start, end = result.span(0)
1110 vardict={'found': result.group(0),
1111 'groups': result.group(),
1112 'flags': result.re.flags}
1113 for i in range(1, 100):
1114 try:
1115 gi = result.group(i)
1116 # Special hack because else the string concat fails:
1117 if gi is None:
1118 gi = "None"
1119 except IndexError:
1120 gi = "Error"
1121 vardict['g%d' % i] = gi
1122 for i in result.re.groupindex.keys():
1123 try:
1124 gi = result.group(i)
1125 if gi is None:
1126 gi = "None"
1127 except IndexError:
1128 gi = "Error"
1129 vardict[i] = gi
1130 repl = eval(repl, vardict)
1131 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001132 print('=== grouping error', t, end=' ')
1133 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001134 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001135 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001136
Antoine Pitrou22628c42008-07-22 17:53:22 +00001137 # Try the match with both pattern and string converted to
1138 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001139 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001140 bpat = bytes(pattern, "ascii")
1141 bs = bytes(s, "ascii")
1142 except UnicodeEncodeError:
1143 # skip non-ascii tests
1144 pass
1145 else:
1146 try:
1147 bpat = re.compile(bpat)
1148 except Exception:
1149 print('=== Fails on bytes pattern compile', t)
1150 if verbose:
1151 traceback.print_exc(file=sys.stdout)
1152 else:
1153 bytes_result = bpat.search(bs)
1154 if bytes_result is None:
1155 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001156
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001157 # Try the match with the search area limited to the extent
1158 # of the match and see if it still succeeds. \B will
1159 # break (because it won't match at the end or start of a
1160 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001161
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001162 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1163 and result is not None:
1164 obj = re.compile(pattern)
1165 result = obj.search(s, result.start(0), result.end(0) + 1)
1166 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001167 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001168
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001169 # Try the match with IGNORECASE enabled, and check that it
1170 # still succeeds.
1171 obj = re.compile(pattern, re.IGNORECASE)
1172 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001173 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001174 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001175
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001176 # Try the match with LOCALE enabled, and check that it
1177 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001178 if '(?u)' not in pattern:
1179 obj = re.compile(pattern, re.LOCALE)
1180 result = obj.search(s)
1181 if result is None:
1182 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001183
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001184 # Try the match with UNICODE locale enabled, and check
1185 # that it still succeeds.
1186 obj = re.compile(pattern, re.UNICODE)
1187 result = obj.search(s)
1188 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001189 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001190
Gregory P. Smith5a631832010-07-27 05:31:29 +00001191
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001192def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001193 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001194 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001195
1196if __name__ == "__main__":
1197 test_main()