blob: f093812442623d791741276af20a55ff548d35d3 [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2 cpython_only
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00005from re import Scanner
R David Murray26dfaac92013-04-14 13:00:54 -04006import sre_constants
Ezio Melottid2114eb2011-03-25 14:08:44 +02007import sys
8import string
9import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Guido van Rossum23b22571997-07-17 22:36:14 +000012# Misc tests from Tim Peters' re.doc
13
Just van Rossum6802c6e2003-07-02 14:36:59 +000014# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020015# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000016# cover most of the code.
17
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000019
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000021
Benjamin Petersone48944b2012-03-07 14:50:25 -060022 def test_keep_buffer(self):
23 # See bug 14212
24 b = bytearray(b'x')
25 it = re.finditer(b'a', b)
26 with self.assertRaises(BufferError):
27 b.extend(b'x'*400)
28 list(it)
29 del it
30 gc_collect()
31 b.extend(b'x'*400)
32
Raymond Hettinger027bb632004-05-31 03:09:25 +000033 def test_weakref(self):
34 s = 'QabbbcR'
35 x = re.compile('ab+c')
36 y = proxy(x)
37 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
38
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def test_search_star_plus(self):
40 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
41 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
42 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
43 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000044 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000045 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
46 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
47 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
48 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000049 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000052 int_value = int(matchobj.group(0))
53 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000054
Skip Montanaro8ed06da2003-04-24 19:43:18 +000055 def test_basic_re_sub(self):
56 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
57 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
58 '9.3 -3 24x100y')
59 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
60 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
63 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 s = r"\1\1"
66 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
67 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
68 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
71 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
72 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
73 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000074
Skip Montanaro8ed06da2003-04-24 19:43:18 +000075 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
76 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
77 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
78 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
79 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000080
Skip Montanaro8ed06da2003-04-24 19:43:18 +000081 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000082
Skip Montanaro2726fcd2003-04-25 14:31:54 +000083 def test_bug_449964(self):
84 # fails for group followed by other escape
85 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
86 'xx\bxx\b')
87
88 def test_bug_449000(self):
89 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000090 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
91 'abc\ndef\n')
92 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
93 'abc\ndef\n')
94 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
95 'abc\ndef\n')
96 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
97 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000098
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000099 def test_bug_1661(self):
100 # Verify that flags do not get silently ignored with compiled patterns
101 pattern = re.compile('.')
102 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
103 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
104 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
105 self.assertRaises(ValueError, re.compile, pattern, re.I)
106
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000107 def test_bug_3629(self):
108 # A regex that triggered a bug in the sre-code validator
109 re.compile("(?P<quote>)(?(quote))")
110
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000111 def test_sub_template_numeric_escape(self):
112 # bug 776311 and friends
113 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
114 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
115 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
116 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
117 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
118 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
119 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
120
121 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
122 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
123
124 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
125 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
126 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
127 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
128 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
129
130 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
131 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000132
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000133 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
134 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
142 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
144 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
145
146 # in python2.3 (etc), these loop endlessly in sre_parser.py
147 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
148 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
149 'xz8')
150 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
151 'xza')
152
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000153 def test_qualified_re_sub(self):
154 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
155 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000156
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000157 def test_bug_114660(self):
158 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
159 'hello there')
160
161 def test_bug_462270(self):
162 # Test for empty sub() behaviour, see SF bug #462270
163 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
164 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
165
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200166 def test_symbolic_groups(self):
167 re.compile('(?P<a>x)(?P=a)(?(a)y)')
168 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
169 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
170 self.assertRaises(re.error, re.compile, '(?Px)')
171 self.assertRaises(re.error, re.compile, '(?P=)')
172 self.assertRaises(re.error, re.compile, '(?P=1)')
173 self.assertRaises(re.error, re.compile, '(?P=a)')
174 self.assertRaises(re.error, re.compile, '(?P=a1)')
175 self.assertRaises(re.error, re.compile, '(?P=a.)')
176 self.assertRaises(re.error, re.compile, '(?P<)')
177 self.assertRaises(re.error, re.compile, '(?P<>)')
178 self.assertRaises(re.error, re.compile, '(?P<1>)')
179 self.assertRaises(re.error, re.compile, '(?P<a.>)')
180 self.assertRaises(re.error, re.compile, '(?())')
181 self.assertRaises(re.error, re.compile, '(?(a))')
182 self.assertRaises(re.error, re.compile, '(?(1a))')
183 self.assertRaises(re.error, re.compile, '(?(a.))')
Georg Brandl1d472b72013-04-14 11:40:00 +0200184 # New valid/invalid identifiers in Python 3
185 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
186 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
187 self.assertRaises(re.error, re.compile, '(?P<©>x)')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200188
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000189 def test_symbolic_refs(self):
190 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
191 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
192 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
193 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200194 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
196 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
197 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
198 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Georg Brandl1d472b72013-04-14 11:40:00 +0200200 # New valid/invalid identifiers in Python 3
201 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
202 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000204
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000205 def test_re_subn(self):
206 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
207 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
208 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
209 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
210 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000211
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000212 def test_re_split(self):
213 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
214 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
215 self.assertEqual(re.split("(:*)", ":a:b::c"),
216 ['', ':', 'a', ':', 'b', '::', 'c'])
217 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
218 self.assertEqual(re.split("(:)*", ":a:b::c"),
219 ['', ':', 'a', ':', 'b', ':', 'c'])
220 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
221 ['', ':', 'a', ':b::', 'c'])
222 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
223 ['', None, ':', 'a', None, ':', '', 'b', None, '',
224 None, '::', 'c'])
225 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
226 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000227
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000228 def test_qualified_re_split(self):
229 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
230 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
231 self.assertEqual(re.split("(:)", ":a:b::c", 2),
232 ['', ':', 'a', ':', 'b::c'])
233 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
234 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000235
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000236 def test_re_findall(self):
237 self.assertEqual(re.findall(":+", "abc"), [])
238 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
239 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
240 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
241 (":", ":"),
242 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000243
Skip Montanaro5ba00542003-04-25 16:00:14 +0000244 def test_bug_117612(self):
245 self.assertEqual(re.findall(r"(a|(b))", "aba"),
246 [("a", ""),("b", "b"),("a", "")])
247
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000248 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000249 self.assertEqual(re.match('a', 'a').groups(), ())
250 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
251 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
252 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
253 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000254
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000255 pat = re.compile('((a)|(b))(c)?')
256 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
257 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
258 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
259 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
260 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000261
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000262 # A single group
263 m = re.match('(a)', 'a')
264 self.assertEqual(m.group(0), 'a')
265 self.assertEqual(m.group(0), 'a')
266 self.assertEqual(m.group(1), 'a')
267 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000268
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000269 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
270 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
271 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
272 (None, 'b', None))
273 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000274
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000275 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000276 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
277 ('(', 'a'))
278 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
279 (None, 'a'))
280 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
282 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
283 ('a', 'b'))
284 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
285 (None, 'd'))
286 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
287 (None, 'd'))
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
289 ('a', ''))
290
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000291 # Tests for bug #1177831: exercise groups other than the first group
292 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
293 self.assertEqual(p.match('abc').groups(),
294 ('a', 'b', 'c'))
295 self.assertEqual(p.match('ad').groups(),
296 ('a', None, 'd'))
297 self.assertEqual(p.match('abd'), None)
298 self.assertEqual(p.match('ac'), None)
299
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000300
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000301 def test_re_groupref(self):
302 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
303 ('|', 'a'))
304 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
305 (None, 'a'))
306 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
308 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
309 ('a', 'a'))
310 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
311 (None, None))
312
313 def test_groupdict(self):
314 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
315 'first second').groupdict(),
316 {'first':'first', 'second':'second'})
317
318 def test_expand(self):
319 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
320 "first second")
321 .expand(r"\2 \1 \g<second> \g<first>"),
322 "second first second first")
323
324 def test_repeat_minmax(self):
325 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
326 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
327 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
328 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
329
330 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
331 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
332 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
333 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
334 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
335 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
338
339 self.assertEqual(re.match("^x{1}$", "xxx"), None)
340 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
341 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
342 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
343
344 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
345 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
346 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
347 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
348 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
349 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
352
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000353 self.assertEqual(re.match("^x{}$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
355
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000356 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000357 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000358 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000359 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
360 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
361 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
362 {'first': 1, 'other': 2})
363
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364 self.assertEqual(re.match("(a)", "a").pos, 0)
365 self.assertEqual(re.match("(a)", "a").endpos, 1)
366 self.assertEqual(re.match("(a)", "a").string, "a")
367 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
368 self.assertNotEqual(re.match("(a)", "a").re, None)
369
370 def test_special_escapes(self):
371 self.assertEqual(re.search(r"\b(b.)\b",
372 "abcd abc bcd bx").group(1), "bx")
373 self.assertEqual(re.search(r"\B(b.)\B",
374 "abc bcd bc abxd").group(1), "bx")
375 self.assertEqual(re.search(r"\b(b.)\b",
376 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
377 self.assertEqual(re.search(r"\B(b.)\B",
378 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
379 self.assertEqual(re.search(r"\b(b.)\b",
380 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
381 self.assertEqual(re.search(r"\B(b.)\B",
382 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
383 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
385 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
386 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000387 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000388 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000389 "abc bcd bc abxd").group(1), "bx")
390 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
392 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a").group(0), "1aa! a")
395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a", re.LOCALE).group(0), "1aa! a")
397 self.assertEqual(re.search(r"\d\D\w\W\s\S",
398 "1aa! a", re.UNICODE).group(0), "1aa! a")
399
Ezio Melotti5a045b92012-02-29 11:48:44 +0200400 def test_string_boundaries(self):
401 # See http://bugs.python.org/issue10713
402 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
403 "abc")
404 # There's a word boundary at the start of a string.
405 self.assertTrue(re.match(r"\b", "abc"))
406 # A non-empty string includes a non-boundary zero-length match.
407 self.assertTrue(re.search(r"\B", "abc"))
408 # There is no non-boundary match at the start of a string.
409 self.assertFalse(re.match(r"\B", "abc"))
410 # However, an empty string contains no word boundaries, and also no
411 # non-boundaries.
412 self.assertEqual(re.search(r"\B", ""), None)
413 # This one is questionable and different from the perlre behaviour,
414 # but describes current behavior.
415 self.assertEqual(re.search(r"\b", ""), None)
416 # A single word-character string has two boundaries, but no
417 # non-boundary gaps.
418 self.assertEqual(len(re.findall(r"\b", "a")), 2)
419 self.assertEqual(len(re.findall(r"\B", "a")), 0)
420 # If there are no words, there are no boundaries
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 self.assertEqual(len(re.findall(r"\b", " ")), 0)
423 # Can match around the whitespace.
424 self.assertEqual(len(re.findall(r"\B", " ")), 2)
425
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000426 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000427 self.assertEqual(re.match("([\u2222\u2223])",
428 "\u2222").group(1), "\u2222")
429 self.assertEqual(re.match("([\u2222\u2223])",
430 "\u2222", re.UNICODE).group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300431 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
432 self.assertEqual(re.match(r,
433 "\uff01", re.UNICODE).group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000434
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100435 def test_big_codesize(self):
436 # Issue #1160
437 r = re.compile('|'.join(('%d'%x for x in range(10000))))
438 self.assertIsNotNone(r.match('1000'))
439 self.assertIsNotNone(r.match('9999'))
440
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000441 def test_anyall(self):
442 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
443 "a\nb")
444 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
445 "a\n\nb")
446
447 def test_non_consuming(self):
448 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
449 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
450 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
451 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
453 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
454 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
455
456 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
457 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
458 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
459 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
460
461 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000462 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
463 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000464 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
465 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
466 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
467 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
468 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
469 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
470 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
471 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
472
473 def test_category(self):
474 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
475
476 def test_getlower(self):
477 import _sre
478 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
479 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
480 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
481
482 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000483 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000484
485 def test_not_literal(self):
486 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
487 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
488
489 def test_search_coverage(self):
490 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
491 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
492
Ezio Melottid2114eb2011-03-25 14:08:44 +0200493 def assertMatch(self, pattern, text, match=None, span=None,
494 matcher=re.match):
495 if match is None and span is None:
496 # the pattern matches the whole text
497 match = text
498 span = (0, len(text))
499 elif match is None or span is None:
500 raise ValueError('If match is not None, span should be specified '
501 '(and vice versa).')
502 m = matcher(pattern, text)
503 self.assertTrue(m)
504 self.assertEqual(m.group(), match)
505 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000506
Ezio Melottid2114eb2011-03-25 14:08:44 +0200507 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300508 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200509 p = ''.join(chr(i) for i in range(256))
510 for c in p:
511 if c in alnum_chars:
512 self.assertEqual(re.escape(c), c)
513 elif c == '\x00':
514 self.assertEqual(re.escape(c), '\\000')
515 else:
516 self.assertEqual(re.escape(c), '\\' + c)
517 self.assertMatch(re.escape(c), c)
518 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000519
Guido van Rossum698280d2008-09-10 17:44:35 +0000520 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300521 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200522 p = bytes(range(256))
523 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000524 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200525 if b in alnum_chars:
526 self.assertEqual(re.escape(b), b)
527 elif i == 0:
528 self.assertEqual(re.escape(b), b'\\000')
529 else:
530 self.assertEqual(re.escape(b), b'\\' + b)
531 self.assertMatch(re.escape(b), b)
532 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000533
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200534 def test_re_escape_non_ascii(self):
535 s = 'xxx\u2620\u2620\u2620xxx'
536 s_escaped = re.escape(s)
537 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
538 self.assertMatch(s_escaped, s)
539 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
540 'x\u2620\u2620\u2620x', (2, 7), re.search)
541
542 def test_re_escape_non_ascii_bytes(self):
543 b = 'y\u2620y\u2620y'.encode('utf-8')
544 b_escaped = re.escape(b)
545 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
546 self.assertMatch(b_escaped, b)
547 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
548 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000549
Skip Montanaro1e703c62003-04-25 15:40:28 +0000550 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000551 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
552 s = pickle.dumps(oldpat)
553 newpat = pickle.loads(s)
554 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000555
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000556 def test_constants(self):
557 self.assertEqual(re.I, re.IGNORECASE)
558 self.assertEqual(re.L, re.LOCALE)
559 self.assertEqual(re.M, re.MULTILINE)
560 self.assertEqual(re.S, re.DOTALL)
561 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000562
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000563 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000564 for flag in [re.I, re.M, re.X, re.S, re.L]:
565 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000566
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000567 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200568 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
569 if i < 256:
570 self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
571 self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
572 self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
573 self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
574 self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
575 self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
576 if i < 0x10000:
577 self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
578 self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
579 self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
580 self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
581 self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
582 self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
583 self.assertIsNotNone(re.match(r"\0", "\000"))
584 self.assertIsNotNone(re.match(r"\08", "\0008"))
585 self.assertIsNotNone(re.match(r"\01", "\001"))
586 self.assertIsNotNone(re.match(r"\018", "\0018"))
587 self.assertIsNotNone(re.match(r"\567", chr(0o167)))
588 self.assertRaises(re.error, re.match, r"\911", "")
589 self.assertRaises(re.error, re.match, r"\x1", "")
590 self.assertRaises(re.error, re.match, r"\x1z", "")
591 self.assertRaises(re.error, re.match, r"\u123", "")
592 self.assertRaises(re.error, re.match, r"\u123z", "")
593 self.assertRaises(re.error, re.match, r"\U0001234", "")
594 self.assertRaises(re.error, re.match, r"\U0001234z", "")
595 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000596
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000597 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200598 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
599 if i < 256:
600 self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
601 self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
602 self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
603 self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
604 self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
605 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
606 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
607 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
608 if i < 0x10000:
609 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
610 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
611 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
612 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
613 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
614 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Ezio Melottieadece22013-02-23 08:40:07 +0200615 self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200616 self.assertRaises(re.error, re.match, r"[\911]", "")
617 self.assertRaises(re.error, re.match, r"[\x1z]", "")
618 self.assertRaises(re.error, re.match, r"[\u123z]", "")
619 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
620 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
621
622 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000623 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Antoine Pitrou463badf2012-06-23 13:29:19 +0200624 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
625 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
626 self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
627 self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
628 self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
629 self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
630 self.assertIsNotNone(re.match(br"\u", b'u'))
631 self.assertIsNotNone(re.match(br"\U", b'U'))
632 self.assertIsNotNone(re.match(br"\0", b"\000"))
633 self.assertIsNotNone(re.match(br"\08", b"\0008"))
634 self.assertIsNotNone(re.match(br"\01", b"\001"))
635 self.assertIsNotNone(re.match(br"\018", b"\0018"))
636 self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
637 self.assertRaises(re.error, re.match, br"\911", b"")
638 self.assertRaises(re.error, re.match, br"\x1", b"")
639 self.assertRaises(re.error, re.match, br"\x1z", b"")
640
641 def test_sre_byte_class_literals(self):
642 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
643 self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i])))
644 self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i])))
645 self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i])))
646 self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
647 self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
648 self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
649 self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
650 self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
651 self.assertIsNotNone(re.match(br"[\u]", b'u'))
652 self.assertIsNotNone(re.match(br"[\U]", b'U'))
653 self.assertRaises(re.error, re.match, br"[\911]", "")
654 self.assertRaises(re.error, re.match, br"[\x1z]", "")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000655
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000656 def test_bug_113254(self):
657 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
658 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
659 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
660
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000661 def test_bug_527371(self):
662 # bug described in patches 527371/672491
663 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
664 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
665 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
666 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
667 self.assertEqual(re.match("((a))", "a").lastindex, 1)
668
669 def test_bug_545855(self):
670 # bug 545855 -- This pattern failed to cause a compile error as it
671 # should, instead provoking a TypeError.
672 self.assertRaises(re.error, re.compile, 'foo[a-')
673
674 def test_bug_418626(self):
675 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
676 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
677 # pattern '*?' on a long string.
678 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
679 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
680 20003)
681 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000682 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000683 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000684 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000685
686 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000687 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000688 self.assertEqual(re.compile(pat) and 1, 1)
689
Skip Montanaro1e703c62003-04-25 15:40:28 +0000690 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000691 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000692 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000693 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
694 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
695 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000696
Serhiy Storchakafa468162013-02-16 21:23:53 +0200697 def test_unlimited_zero_width_repeat(self):
698 # Issue #9669
699 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
700 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
701 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
702 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
703 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
704 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
705
Skip Montanaro1e703c62003-04-25 15:40:28 +0000706 def test_scanner(self):
707 def s_ident(scanner, token): return token
708 def s_operator(scanner, token): return "op%s" % token
709 def s_float(scanner, token): return float(token)
710 def s_int(scanner, token): return int(token)
711
712 scanner = Scanner([
713 (r"[a-zA-Z_]\w*", s_ident),
714 (r"\d+\.\d*", s_float),
715 (r"\d+", s_int),
716 (r"=|\+|-|\*|/", s_operator),
717 (r"\s+", None),
718 ])
719
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000720 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
721
Skip Montanaro1e703c62003-04-25 15:40:28 +0000722 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
723 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
724 'op+', 'bar'], ''))
725
Skip Montanaro5ba00542003-04-25 16:00:14 +0000726 def test_bug_448951(self):
727 # bug 448951 (similar to 429357, but with single char match)
728 # (Also test greedy matches.)
729 for op in '','?','*':
730 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
731 (None, None))
732 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
733 ('a:', 'a'))
734
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000735 def test_bug_725106(self):
736 # capturing groups in alternatives in repeats
737 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
738 ('b', 'a'))
739 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
740 ('c', 'b'))
741 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
742 ('b', None))
743 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
744 ('b', None))
745 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
746 ('b', 'a'))
747 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
748 ('c', 'b'))
749 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
750 ('b', None))
751 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
752 ('b', None))
753
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000754 def test_bug_725149(self):
755 # mark_stack_base restoring before restoring marks
756 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
757 ('a', None))
758 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
759 ('a', None, None))
760
Just van Rossum12723ba2003-07-02 20:03:04 +0000761 def test_bug_764548(self):
762 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000763 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000764 pat = re.compile(my_unicode("abc"))
765 self.assertEqual(pat.match("xyz"), None)
766
Skip Montanaro5ba00542003-04-25 16:00:14 +0000767 def test_finditer(self):
768 iter = re.finditer(r":+", "a:b::c:::d")
769 self.assertEqual([item.group(0) for item in iter],
770 [":", "::", ":::"])
771
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600772 pat = re.compile(r":+")
773 iter = pat.finditer("a:b::c:::d", 1, 10)
774 self.assertEqual([item.group(0) for item in iter],
775 [":", "::", ":::"])
776
777 pat = re.compile(r":+")
778 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
779 self.assertEqual([item.group(0) for item in iter],
780 [":", "::", ":::"])
781
782 pat = re.compile(r":+")
783 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
784 self.assertEqual([item.group(0) for item in iter],
785 [":", "::", ":::"])
786
787 pat = re.compile(r":+")
788 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
789 self.assertEqual([item.group(0) for item in iter],
790 ["::", "::"])
791
Thomas Wouters40a088d2008-03-18 20:19:54 +0000792 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000793 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000794 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000795
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000796 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000797 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000798 self.assertEqual(re.compile(pattern).split("a.b.c"),
799 ['a','b','c'])
800
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000801 def test_bug_581080(self):
802 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000803 self.assertEqual(next(iter).span(), (1,2))
804 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000805
806 scanner = re.compile(r"\s").scanner("a b")
807 self.assertEqual(scanner.search().span(), (1, 2))
808 self.assertEqual(scanner.search(), None)
809
810 def test_bug_817234(self):
811 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000812 self.assertEqual(next(iter).span(), (0, 4))
813 self.assertEqual(next(iter).span(), (4, 4))
814 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000815
Mark Dickinson1f268282009-07-28 17:22:36 +0000816 def test_bug_6561(self):
817 # '\d' should match characters in Unicode category 'Nd'
818 # (Number, Decimal Digit), but not those in 'Nl' (Number,
819 # Letter) or 'No' (Number, Other).
820 decimal_digits = [
821 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
822 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
823 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
824 ]
825 for x in decimal_digits:
826 self.assertEqual(re.match('^\d$', x).group(0), x)
827
828 not_decimal_digits = [
829 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
830 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
831 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
832 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
833 ]
834 for x in not_decimal_digits:
835 self.assertIsNone(re.match('^\d$', x))
836
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 def test_empty_array(self):
838 # SF buf 1647541
839 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000840 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000842 self.assertEqual(re.compile(b"bla").match(a), None)
843 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000844
Christian Heimes072c0f12008-01-03 23:01:04 +0000845 def test_inline_flags(self):
846 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000847 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
848 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000849
850 p = re.compile(upper_char, re.I | re.U)
851 q = p.match(lower_char)
852 self.assertNotEqual(q, None)
853
854 p = re.compile(lower_char, re.I | re.U)
855 q = p.match(upper_char)
856 self.assertNotEqual(q, None)
857
858 p = re.compile('(?i)' + upper_char, re.U)
859 q = p.match(lower_char)
860 self.assertNotEqual(q, None)
861
862 p = re.compile('(?i)' + lower_char, re.U)
863 q = p.match(upper_char)
864 self.assertNotEqual(q, None)
865
866 p = re.compile('(?iu)' + upper_char)
867 q = p.match(lower_char)
868 self.assertNotEqual(q, None)
869
870 p = re.compile('(?iu)' + lower_char)
871 q = p.match(upper_char)
872 self.assertNotEqual(q, None)
873
Christian Heimes25bb7832008-01-11 16:17:00 +0000874 def test_dollar_matches_twice(self):
875 "$ matches the end of string, and just before the terminating \n"
876 pattern = re.compile('$')
877 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
878 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
879 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
880
881 pattern = re.compile('$', re.MULTILINE)
882 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
883 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
884 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
885
Antoine Pitroufd036452008-08-19 17:56:33 +0000886 def test_bytes_str_mixing(self):
887 # Mixing str and bytes is disallowed
888 pat = re.compile('.')
889 bpat = re.compile(b'.')
890 self.assertRaises(TypeError, pat.match, b'b')
891 self.assertRaises(TypeError, bpat.match, 'b')
892 self.assertRaises(TypeError, pat.sub, b'b', 'c')
893 self.assertRaises(TypeError, pat.sub, 'b', b'c')
894 self.assertRaises(TypeError, pat.sub, b'b', b'c')
895 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
896 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
897 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
898
899 def test_ascii_and_unicode_flag(self):
900 # String patterns
901 for flags in (0, re.UNICODE):
902 pat = re.compile('\xc0', flags | re.IGNORECASE)
903 self.assertNotEqual(pat.match('\xe0'), None)
904 pat = re.compile('\w', flags)
905 self.assertNotEqual(pat.match('\xe0'), None)
906 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
907 self.assertEqual(pat.match('\xe0'), None)
908 pat = re.compile('(?a)\xc0', re.IGNORECASE)
909 self.assertEqual(pat.match('\xe0'), None)
910 pat = re.compile('\w', re.ASCII)
911 self.assertEqual(pat.match('\xe0'), None)
912 pat = re.compile('(?a)\w')
913 self.assertEqual(pat.match('\xe0'), None)
914 # Bytes patterns
915 for flags in (0, re.ASCII):
916 pat = re.compile(b'\xc0', re.IGNORECASE)
917 self.assertEqual(pat.match(b'\xe0'), None)
918 pat = re.compile(b'\w')
919 self.assertEqual(pat.match(b'\xe0'), None)
920 # Incompatibilities
921 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
922 self.assertRaises(ValueError, re.compile, b'(?u)\w')
923 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
924 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
925 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
926 self.assertRaises(ValueError, re.compile, '(?au)\w')
927
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000928 def test_bug_6509(self):
929 # Replacement strings of both types must parse properly.
930 # all strings
931 pat = re.compile('a(\w)')
932 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
933 pat = re.compile('a(.)')
934 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
935 pat = re.compile('..')
936 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
937
938 # all bytes
939 pat = re.compile(b'a(\w)')
940 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
941 pat = re.compile(b'a(.)')
942 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
943 pat = re.compile(b'..')
944 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
945
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000946 def test_dealloc(self):
947 # issue 3299: check for segfault in debug build
948 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000949 # the overflow limit is different on wide and narrow builds and it
950 # depends on the definition of SRE_CODE (see sre.h).
951 # 2**128 should be big enough to overflow on both. For smaller values
952 # a RuntimeError is raised instead of OverflowError.
953 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000954 self.assertRaises(TypeError, re.finditer, "a", {})
955 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000956 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000957
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 def test_search_dot_unicode(self):
959 self.assertIsNotNone(re.search("123.*-", '123abc-'))
960 self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
961 self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
962 self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
963 self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
964
Ezio Melottidf723e12012-03-13 01:29:48 +0200965 def test_compile(self):
966 # Test return value when given string and pattern as parameter
967 pattern = re.compile('random pattern')
968 self.assertIsInstance(pattern, re._pattern_type)
969 same_pattern = re.compile(pattern)
970 self.assertIsInstance(same_pattern, re._pattern_type)
971 self.assertIs(same_pattern, pattern)
972 # Test behaviour when not given a string or pattern as parameter
973 self.assertRaises(TypeError, re.compile, 0)
974
Ezio Melottife8e6e72013-01-11 08:32:01 +0200975 def test_bug_13899(self):
976 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
977 # nothing. Ditto B and Z.
978 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
979 ['A', 'B', '\b', 'C', 'Z'])
980
Antoine Pitroub33941a2012-12-03 20:55:56 +0100981 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100982 def test_large_search(self, size):
983 # Issue #10182: indices were 32-bit-truncated.
984 s = 'a' * size
985 m = re.search('$', s)
986 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100987 self.assertEqual(m.start(), size)
988 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100989
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100990 # The huge memuse is because of re.sub() using a list and a join()
991 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +0100992 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100993 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100994 # Issue #10182: indices were 32-bit-truncated.
995 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100996 r, n = re.subn('', '', s)
997 self.assertEqual(r, s)
998 self.assertEqual(n, size + 1)
999
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001000 def test_bug_16688(self):
1001 # Issue 16688: Backreferences make case-insensitive regex fail on
1002 # non-ASCII strings.
1003 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1004 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001005
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001006 def test_repeat_minmax_overflow(self):
1007 # Issue #13169
1008 string = "x" * 100000
1009 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1010 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1011 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1012 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1013 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1014 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1015 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1016 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1017 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1018 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1019 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1020
1021 @cpython_only
1022 def test_repeat_minmax_overflow_maxrepeat(self):
1023 try:
1024 from _sre import MAXREPEAT
1025 except ImportError:
1026 self.skipTest('requires _sre.MAXREPEAT constant')
1027 string = "x" * 100000
1028 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1029 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1030 (0, 100000))
1031 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1032 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1033 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1034 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1035
R David Murray26dfaac92013-04-14 13:00:54 -04001036 def test_backref_group_name_in_exception(self):
1037 # Issue 17341: Poor error message when compiling invalid regex
1038 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1039 re.compile('(?P=<foo>)')
1040
1041 def test_group_name_in_exception(self):
1042 # Issue 17341: Poor error message when compiling invalid regex
1043 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1044 re.compile('(?P<?foo>)')
1045
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001046 def test_issue17998(self):
1047 for reps in '*', '+', '?', '{1}':
1048 for mod in '', '?':
1049 pattern = '.' + reps + mod + 'yz'
1050 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1051 ['xyz'], msg=pattern)
1052 pattern = pattern.encode()
1053 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1054 [b'xyz'], msg=pattern)
1055
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001056
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001057 def test_bug_2537(self):
1058 # issue 2537: empty submatches
1059 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1060 for inner_op in ('{0,}', '*', '?'):
1061 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1062 m = r.match("xyyzy")
1063 self.assertEqual(m.group(0), "xyy")
1064 self.assertEqual(m.group(1), "")
1065 self.assertEqual(m.group(2), "y")
1066
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001067def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +00001068 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001069 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001070 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001071 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001072 # To save time, only run the first and last 10 tests
1073 #tests = tests[:10] + tests[-10:]
1074 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001075
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001076 for t in tests:
1077 sys.stdout.flush()
1078 pattern = s = outcome = repl = expected = None
1079 if len(t) == 5:
1080 pattern, s, outcome, repl, expected = t
1081 elif len(t) == 3:
1082 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001083 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001084 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001085
Guido van Rossum41360a41998-03-26 19:42:58 +00001086 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001087 obj = re.compile(pattern)
1088 except re.error:
1089 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001090 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001091 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001092 except KeyboardInterrupt: raise KeyboardInterrupt
1093 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001094 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001095 if verbose:
1096 traceback.print_exc(file=sys.stdout)
1097 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001098 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001099 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001100 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001101 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001102 if outcome == SYNTAX_ERROR:
1103 # This should have been a syntax error; forget it.
1104 pass
1105 elif outcome == FAIL:
1106 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001107 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001108 elif outcome == SUCCEED:
1109 if result is not None:
1110 # Matched, as expected, so now we compute the
1111 # result string and compare it to our expected result.
1112 start, end = result.span(0)
1113 vardict={'found': result.group(0),
1114 'groups': result.group(),
1115 'flags': result.re.flags}
1116 for i in range(1, 100):
1117 try:
1118 gi = result.group(i)
1119 # Special hack because else the string concat fails:
1120 if gi is None:
1121 gi = "None"
1122 except IndexError:
1123 gi = "Error"
1124 vardict['g%d' % i] = gi
1125 for i in result.re.groupindex.keys():
1126 try:
1127 gi = result.group(i)
1128 if gi is None:
1129 gi = "None"
1130 except IndexError:
1131 gi = "Error"
1132 vardict[i] = gi
1133 repl = eval(repl, vardict)
1134 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001135 print('=== grouping error', t, end=' ')
1136 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001137 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001138 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001139
Antoine Pitrou22628c42008-07-22 17:53:22 +00001140 # Try the match with both pattern and string converted to
1141 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001142 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001143 bpat = bytes(pattern, "ascii")
1144 bs = bytes(s, "ascii")
1145 except UnicodeEncodeError:
1146 # skip non-ascii tests
1147 pass
1148 else:
1149 try:
1150 bpat = re.compile(bpat)
1151 except Exception:
1152 print('=== Fails on bytes pattern compile', t)
1153 if verbose:
1154 traceback.print_exc(file=sys.stdout)
1155 else:
1156 bytes_result = bpat.search(bs)
1157 if bytes_result is None:
1158 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001159
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001160 # Try the match with the search area limited to the extent
1161 # of the match and see if it still succeeds. \B will
1162 # break (because it won't match at the end or start of a
1163 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001164
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001165 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1166 and result is not None:
1167 obj = re.compile(pattern)
1168 result = obj.search(s, result.start(0), result.end(0) + 1)
1169 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001170 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001171
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001172 # Try the match with IGNORECASE enabled, and check that it
1173 # still succeeds.
1174 obj = re.compile(pattern, re.IGNORECASE)
1175 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001176 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001177 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001178
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001179 # Try the match with LOCALE enabled, and check that it
1180 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001181 if '(?u)' not in pattern:
1182 obj = re.compile(pattern, re.LOCALE)
1183 result = obj.search(s)
1184 if result is None:
1185 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001186
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001187 # Try the match with UNICODE locale enabled, and check
1188 # that it still succeeds.
1189 obj = re.compile(pattern, re.UNICODE)
1190 result = obj.search(s)
1191 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001192 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001193
Gregory P. Smith5a631832010-07-27 05:31:29 +00001194
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001195def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001196 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001197 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001198
1199if __name__ == "__main__":
1200 test_main()