blob: fe71c8477e2b22d4aab0cac2d30616035eb877cc [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Antoine Pitrouf5814112014-02-03 20:59:59 +01003from test.test_support import captured_stdout
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00005from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04006import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02007import sys
8import string
9import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Antoine Pitrou735f36e2012-12-03 20:53:12 +010012
Guido van Rossum23b22571997-07-17 22:36:14 +000013# Misc tests from Tim Peters' re.doc
14
Just van Rossum6802c6e2003-07-02 14:36:59 +000015# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020016# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000017# cover most of the code.
18
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000020
Skip Montanaro8ed06da2003-04-24 19:43:18 +000021class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000022
23 def test_weakref(self):
24 s = 'QabbbcR'
25 x = re.compile('ab+c')
26 y = proxy(x)
27 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029 def test_search_star_plus(self):
30 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
31 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
32 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
33 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000034 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000035 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
36 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
37 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
38 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000039 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000042 int_value = int(matchobj.group(0))
43 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000044
Skip Montanaro8ed06da2003-04-24 19:43:18 +000045 def test_basic_re_sub(self):
46 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
47 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
48 '9.3 -3 24x100y')
49 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
50 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
53 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000054
Skip Montanaro8ed06da2003-04-24 19:43:18 +000055 s = r"\1\1"
56 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
57 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
58 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
63 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
66 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
68 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
69 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000070
Skip Montanaro8ed06da2003-04-24 19:43:18 +000071 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000072
Skip Montanaro2726fcd2003-04-25 14:31:54 +000073 def test_bug_449964(self):
74 # fails for group followed by other escape
75 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
76 'xx\bxx\b')
77
78 def test_bug_449000(self):
79 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000080 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
84 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
85 'abc\ndef\n')
86 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
87 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000088
Guido van Rossum1ff91d92007-09-10 22:02:25 +000089 def test_bug_1140(self):
90 # re.sub(x, y, u'') should return u'', not '', and
91 # re.sub(x, y, '') should return '', not u''.
92 # Also:
93 # re.sub(x, y, unicode(x)) should return unicode(y), and
94 # re.sub(x, y, str(x)) should return
95 # str(y) if isinstance(y, str) else unicode(y).
96 for x in 'x', u'x':
97 for y in 'y', u'y':
98 z = re.sub(x, y, u'')
99 self.assertEqual(z, u'')
100 self.assertEqual(type(z), unicode)
101 #
102 z = re.sub(x, y, '')
103 self.assertEqual(z, '')
104 self.assertEqual(type(z), str)
105 #
106 z = re.sub(x, y, unicode(x))
107 self.assertEqual(z, y)
108 self.assertEqual(type(z), unicode)
109 #
110 z = re.sub(x, y, str(x))
111 self.assertEqual(z, y)
112 self.assertEqual(type(z), type(y))
113
Raymond Hettinger80016c92007-12-19 18:13:31 +0000114 def test_bug_1661(self):
115 # Verify that flags do not get silently ignored with compiled patterns
116 pattern = re.compile('.')
117 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.compile, pattern, re.I)
121
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000122 def test_bug_3629(self):
123 # A regex that triggered a bug in the sre-code validator
124 re.compile("(?P<quote>)(?(quote))")
125
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000126 def test_sub_template_numeric_escape(self):
127 # bug 776311 and friends
128 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
129 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
130 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
131 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
132 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
133 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
134 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
135
136 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
137 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
138
139 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
140 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
141 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
142 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
143 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
144
145 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
146 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000147
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000148 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
157 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
158 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
159 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
160
161 # in python2.3 (etc), these loop endlessly in sre_parser.py
162 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
163 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
164 'xz8')
165 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
166 'xza')
167
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000168 def test_qualified_re_sub(self):
169 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
170 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000171
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000172 def test_bug_114660(self):
173 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
174 'hello there')
175
176 def test_bug_462270(self):
177 # Test for empty sub() behaviour, see SF bug #462270
178 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
179 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
180
Ezio Melottief317382012-11-03 20:31:12 +0200181 def test_symbolic_groups(self):
182 re.compile('(?P<a>x)(?P=a)(?(a)y)')
183 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
184 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
185 self.assertRaises(re.error, re.compile, '(?Px)')
186 self.assertRaises(re.error, re.compile, '(?P=)')
187 self.assertRaises(re.error, re.compile, '(?P=1)')
188 self.assertRaises(re.error, re.compile, '(?P=a)')
189 self.assertRaises(re.error, re.compile, '(?P=a1)')
190 self.assertRaises(re.error, re.compile, '(?P=a.)')
191 self.assertRaises(re.error, re.compile, '(?P<)')
192 self.assertRaises(re.error, re.compile, '(?P<>)')
193 self.assertRaises(re.error, re.compile, '(?P<1>)')
194 self.assertRaises(re.error, re.compile, '(?P<a.>)')
195 self.assertRaises(re.error, re.compile, '(?())')
196 self.assertRaises(re.error, re.compile, '(?(a))')
197 self.assertRaises(re.error, re.compile, '(?(1a))')
198 self.assertRaises(re.error, re.compile, '(?(a.))')
199
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000200 def test_symbolic_refs(self):
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
207 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
208 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
209 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000210 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000211
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000212 def test_re_subn(self):
213 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
214 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
215 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
216 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
217 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000218
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000219 def test_re_split(self):
220 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
221 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
222 self.assertEqual(re.split("(:*)", ":a:b::c"),
223 ['', ':', 'a', ':', 'b', '::', 'c'])
224 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
225 self.assertEqual(re.split("(:)*", ":a:b::c"),
226 ['', ':', 'a', ':', 'b', ':', 'c'])
227 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
228 ['', ':', 'a', ':b::', 'c'])
229 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
230 ['', None, ':', 'a', None, ':', '', 'b', None, '',
231 None, '::', 'c'])
232 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
233 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000234
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000235 def test_qualified_re_split(self):
236 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
237 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
238 self.assertEqual(re.split("(:)", ":a:b::c", 2),
239 ['', ':', 'a', ':', 'b::c'])
240 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
241 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000242
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000243 def test_re_findall(self):
244 self.assertEqual(re.findall(":+", "abc"), [])
245 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
246 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
247 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
248 (":", ":"),
249 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000250
Skip Montanaro5ba00542003-04-25 16:00:14 +0000251 def test_bug_117612(self):
252 self.assertEqual(re.findall(r"(a|(b))", "aba"),
253 [("a", ""),("b", "b"),("a", "")])
254
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000255 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000256 self.assertEqual(re.match('a', 'a').groups(), ())
257 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
258 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
259 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
260 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000261
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000262 pat = re.compile('((a)|(b))(c)?')
263 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
264 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
265 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
266 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
267 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000268
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000269 # A single group
270 m = re.match('(a)', 'a')
271 self.assertEqual(m.group(0), 'a')
272 self.assertEqual(m.group(0), 'a')
273 self.assertEqual(m.group(1), 'a')
274 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000275
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000276 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
277 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
278 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
279 (None, 'b', None))
280 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000281
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000282 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000283 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
284 ('(', 'a'))
285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
286 (None, 'a'))
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
288 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
289 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
290 ('a', 'b'))
291 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
292 (None, 'd'))
293 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
294 (None, 'd'))
295 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
296 ('a', ''))
297
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000298 # Tests for bug #1177831: exercise groups other than the first group
299 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
300 self.assertEqual(p.match('abc').groups(),
301 ('a', 'b', 'c'))
302 self.assertEqual(p.match('ad').groups(),
303 ('a', None, 'd'))
304 self.assertEqual(p.match('abd'), None)
305 self.assertEqual(p.match('ac'), None)
306
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000307
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000308 def test_re_groupref(self):
309 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
310 ('|', 'a'))
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
312 (None, 'a'))
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
314 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
315 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
316 ('a', 'a'))
317 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
318 (None, None))
319
320 def test_groupdict(self):
321 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
322 'first second').groupdict(),
323 {'first':'first', 'second':'second'})
324
325 def test_expand(self):
326 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
327 "first second")
328 .expand(r"\2 \1 \g<second> \g<first>"),
329 "second first second first")
330
331 def test_repeat_minmax(self):
332 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
334 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
335 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
336
337 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
344 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
345
346 self.assertEqual(re.match("^x{1}$", "xxx"), None)
347 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
348 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
349 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
350
351 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
358 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
359
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000360 self.assertEqual(re.match("^x{}$", "xxx"), None)
361 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
362
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000363 def test_getattr(self):
364 self.assertEqual(re.match("(a)", "a").pos, 0)
365 self.assertEqual(re.match("(a)", "a").endpos, 1)
366 self.assertEqual(re.match("(a)", "a").string, "a")
367 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
368 self.assertNotEqual(re.match("(a)", "a").re, None)
369
370 def test_special_escapes(self):
371 self.assertEqual(re.search(r"\b(b.)\b",
372 "abcd abc bcd bx").group(1), "bx")
373 self.assertEqual(re.search(r"\B(b.)\B",
374 "abc bcd bc abxd").group(1), "bx")
375 self.assertEqual(re.search(r"\b(b.)\b",
376 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
377 self.assertEqual(re.search(r"\B(b.)\B",
378 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
379 self.assertEqual(re.search(r"\b(b.)\b",
380 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
381 self.assertEqual(re.search(r"\B(b.)\B",
382 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
383 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
385 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
386 self.assertEqual(re.search(r"\b(b.)\b",
387 u"abcd abc bcd bx").group(1), "bx")
388 self.assertEqual(re.search(r"\B(b.)\B",
389 u"abc bcd bc abxd").group(1), "bx")
390 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
392 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a").group(0), "1aa! a")
395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a", re.LOCALE).group(0), "1aa! a")
397 self.assertEqual(re.search(r"\d\D\w\W\s\S",
398 "1aa! a", re.UNICODE).group(0), "1aa! a")
399
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200400 def test_string_boundaries(self):
401 # See http://bugs.python.org/issue10713
402 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
403 "abc")
404 # There's a word boundary at the start of a string.
405 self.assertTrue(re.match(r"\b", "abc"))
406 # A non-empty string includes a non-boundary zero-length match.
407 self.assertTrue(re.search(r"\B", "abc"))
408 # There is no non-boundary match at the start of a string.
409 self.assertFalse(re.match(r"\B", "abc"))
410 # However, an empty string contains no word boundaries, and also no
411 # non-boundaries.
412 self.assertEqual(re.search(r"\B", ""), None)
413 # This one is questionable and different from the perlre behaviour,
414 # but describes current behavior.
415 self.assertEqual(re.search(r"\b", ""), None)
416 # A single word-character string has two boundaries, but no
417 # non-boundary gaps.
418 self.assertEqual(len(re.findall(r"\b", "a")), 2)
419 self.assertEqual(len(re.findall(r"\B", "a")), 0)
420 # If there are no words, there are no boundaries
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 self.assertEqual(len(re.findall(r"\b", " ")), 0)
423 # Can match around the whitespace.
424 self.assertEqual(len(re.findall(r"\B", " ")), 2)
425
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000426 def test_bigcharset(self):
427 self.assertEqual(re.match(u"([\u2222\u2223])",
428 u"\u2222").group(1), u"\u2222")
429 self.assertEqual(re.match(u"([\u2222\u2223])",
430 u"\u2222", re.UNICODE).group(1), u"\u2222")
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300431 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
432 self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000433
Antoine Pitroub83ea142012-11-20 22:30:42 +0100434 def test_big_codesize(self):
435 # Issue #1160
436 r = re.compile('|'.join(('%d'%x for x in range(10000))))
437 self.assertIsNotNone(r.match('1000'))
438 self.assertIsNotNone(r.match('9999'))
439
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000440 def test_anyall(self):
441 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
442 "a\nb")
443 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
444 "a\n\nb")
445
446 def test_non_consuming(self):
447 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
448 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
449 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
450 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
451 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
453 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
454
455 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
456 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
458 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
459
460 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000461 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
462 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000463 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
464 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
465 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
466 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
467 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
468 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
469 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
470 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
471
472 def test_category(self):
473 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
474
475 def test_getlower(self):
476 import _sre
477 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
478 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
479 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
480
481 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
482 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
483
484 def test_not_literal(self):
485 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
486 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
487
488 def test_search_coverage(self):
489 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
490 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
491
Ezio Melotti46645632011-03-25 14:50:52 +0200492 def assertMatch(self, pattern, text, match=None, span=None,
493 matcher=re.match):
494 if match is None and span is None:
495 # the pattern matches the whole text
496 match = text
497 span = (0, len(text))
498 elif match is None or span is None:
499 raise ValueError('If match is not None, span should be specified '
500 '(and vice versa).')
501 m = matcher(pattern, text)
502 self.assertTrue(m)
503 self.assertEqual(m.group(), match)
504 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000505
Ezio Melotti46645632011-03-25 14:50:52 +0200506 def test_re_escape(self):
507 alnum_chars = string.ascii_letters + string.digits
508 p = u''.join(unichr(i) for i in range(256))
509 for c in p:
510 if c in alnum_chars:
511 self.assertEqual(re.escape(c), c)
512 elif c == u'\x00':
513 self.assertEqual(re.escape(c), u'\\000')
514 else:
515 self.assertEqual(re.escape(c), u'\\' + c)
516 self.assertMatch(re.escape(c), c)
517 self.assertMatch(re.escape(p), p)
518
519 def test_re_escape_byte(self):
520 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
521 p = ''.join(chr(i) for i in range(256))
522 for b in p:
523 if b in alnum_chars:
524 self.assertEqual(re.escape(b), b)
525 elif b == b'\x00':
526 self.assertEqual(re.escape(b), b'\\000')
527 else:
528 self.assertEqual(re.escape(b), b'\\' + b)
529 self.assertMatch(re.escape(b), b)
530 self.assertMatch(re.escape(p), p)
531
532 def test_re_escape_non_ascii(self):
533 s = u'xxx\u2620\u2620\u2620xxx'
534 s_escaped = re.escape(s)
535 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
536 self.assertMatch(s_escaped, s)
537 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
538 u'x\u2620\u2620\u2620x', (2, 7), re.search)
539
540 def test_re_escape_non_ascii_bytes(self):
541 b = u'y\u2620y\u2620y'.encode('utf-8')
542 b_escaped = re.escape(b)
543 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
544 self.assertMatch(b_escaped, b)
545 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
546 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000547
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000548 def test_pickling(self):
549 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000550 self.pickle_test(pickle)
551 import cPickle
552 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000553 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000554 import_module("sre", deprecated=True)
555 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000556
557 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000558 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
559 s = pickle.dumps(oldpat)
560 newpat = pickle.loads(s)
561 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000562
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000563 def test_constants(self):
564 self.assertEqual(re.I, re.IGNORECASE)
565 self.assertEqual(re.L, re.LOCALE)
566 self.assertEqual(re.M, re.MULTILINE)
567 self.assertEqual(re.S, re.DOTALL)
568 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000569
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000570 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000571 for flag in [re.I, re.M, re.X, re.S, re.L]:
572 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000573
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000574 def test_sre_character_literals(self):
575 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
576 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
577 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
578 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
579 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
580 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
581 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
582 self.assertRaises(re.error, re.match, "\911", "")
583
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000584 def test_sre_character_class_literals(self):
585 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
586 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
588 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
589 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
590 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
591 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
592 self.assertRaises(re.error, re.match, "[\911]", "")
593
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000594 def test_bug_113254(self):
595 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
596 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
597 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
598
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000599 def test_bug_527371(self):
600 # bug described in patches 527371/672491
601 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
602 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
603 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
604 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
605 self.assertEqual(re.match("((a))", "a").lastindex, 1)
606
607 def test_bug_545855(self):
608 # bug 545855 -- This pattern failed to cause a compile error as it
609 # should, instead provoking a TypeError.
610 self.assertRaises(re.error, re.compile, 'foo[a-')
611
612 def test_bug_418626(self):
613 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
614 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
615 # pattern '*?' on a long string.
616 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
617 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
618 20003)
619 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000620 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000621 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000622 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000623
624 def test_bug_612074(self):
625 pat=u"["+re.escape(u"\u2039")+u"]"
626 self.assertEqual(re.compile(pat) and 1, 1)
627
Skip Montanaro1e703c62003-04-25 15:40:28 +0000628 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000629 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000630 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000631 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
632 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
633 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000634
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200635 def test_unlimited_zero_width_repeat(self):
636 # Issue #9669
637 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
638 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
639 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
640 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
641 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
642 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
643
Skip Montanaro1e703c62003-04-25 15:40:28 +0000644 def test_scanner(self):
645 def s_ident(scanner, token): return token
646 def s_operator(scanner, token): return "op%s" % token
647 def s_float(scanner, token): return float(token)
648 def s_int(scanner, token): return int(token)
649
650 scanner = Scanner([
651 (r"[a-zA-Z_]\w*", s_ident),
652 (r"\d+\.\d*", s_float),
653 (r"\d+", s_int),
654 (r"=|\+|-|\*|/", s_operator),
655 (r"\s+", None),
656 ])
657
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000658 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
659
Skip Montanaro1e703c62003-04-25 15:40:28 +0000660 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
661 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
662 'op+', 'bar'], ''))
663
Skip Montanaro5ba00542003-04-25 16:00:14 +0000664 def test_bug_448951(self):
665 # bug 448951 (similar to 429357, but with single char match)
666 # (Also test greedy matches.)
667 for op in '','?','*':
668 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
669 (None, None))
670 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
671 ('a:', 'a'))
672
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000673 def test_bug_725106(self):
674 # capturing groups in alternatives in repeats
675 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
676 ('b', 'a'))
677 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
678 ('c', 'b'))
679 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
680 ('b', None))
681 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
682 ('b', None))
683 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
684 ('b', 'a'))
685 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
686 ('c', 'b'))
687 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
688 ('b', None))
689 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
690 ('b', None))
691
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000692 def test_bug_725149(self):
693 # mark_stack_base restoring before restoring marks
694 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
695 ('a', None))
696 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
697 ('a', None, None))
698
Just van Rossum12723ba2003-07-02 20:03:04 +0000699 def test_bug_764548(self):
700 # bug 764548, re.compile() barfs on str/unicode subclasses
701 try:
702 unicode
703 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600704 self.skipTest('no problem if we have no unicode')
Just van Rossum12723ba2003-07-02 20:03:04 +0000705 class my_unicode(unicode): pass
706 pat = re.compile(my_unicode("abc"))
707 self.assertEqual(pat.match("xyz"), None)
708
Skip Montanaro5ba00542003-04-25 16:00:14 +0000709 def test_finditer(self):
710 iter = re.finditer(r":+", "a:b::c:::d")
711 self.assertEqual([item.group(0) for item in iter],
712 [":", "::", ":::"])
713
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000714 def test_bug_926075(self):
715 try:
716 unicode
717 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600718 self.skipTest('no problem if we have no unicode')
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000719 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000720 re.compile(eval("u'bug_926075'")))
721
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000722 def test_bug_931848(self):
723 try:
724 unicode
725 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600726 self.skipTest('no problem if we have no unicode')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000727 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
728 self.assertEqual(re.compile(pattern).split("a.b.c"),
729 ['a','b','c'])
730
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000731 def test_bug_581080(self):
732 iter = re.finditer(r"\s", "a b")
733 self.assertEqual(iter.next().span(), (1,2))
734 self.assertRaises(StopIteration, iter.next)
735
736 scanner = re.compile(r"\s").scanner("a b")
737 self.assertEqual(scanner.search().span(), (1, 2))
738 self.assertEqual(scanner.search(), None)
739
740 def test_bug_817234(self):
741 iter = re.finditer(r".*", "asdf")
742 self.assertEqual(iter.next().span(), (0, 4))
743 self.assertEqual(iter.next().span(), (4, 4))
744 self.assertRaises(StopIteration, iter.next)
745
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000746 def test_bug_6561(self):
747 # '\d' should match characters in Unicode category 'Nd'
748 # (Number, Decimal Digit), but not those in 'Nl' (Number,
749 # Letter) or 'No' (Number, Other).
750 decimal_digits = [
751 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
752 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
753 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
754 ]
755 for x in decimal_digits:
756 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
757
758 not_decimal_digits = [
759 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
760 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
761 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
762 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
763 ]
764 for x in not_decimal_digits:
765 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
766
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000767 def test_empty_array(self):
768 # SF buf 1647541
769 import array
770 for typecode in 'cbBuhHiIlLfd':
771 a = array.array(typecode)
772 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000773 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000774
Guido van Rossumae04c332008-01-03 19:12:44 +0000775 def test_inline_flags(self):
776 # Bug #1700
777 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
778 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
779
780 p = re.compile(upper_char, re.I | re.U)
781 q = p.match(lower_char)
782 self.assertNotEqual(q, None)
783
784 p = re.compile(lower_char, re.I | re.U)
785 q = p.match(upper_char)
786 self.assertNotEqual(q, None)
787
788 p = re.compile('(?i)' + upper_char, re.U)
789 q = p.match(lower_char)
790 self.assertNotEqual(q, None)
791
792 p = re.compile('(?i)' + lower_char, re.U)
793 q = p.match(upper_char)
794 self.assertNotEqual(q, None)
795
796 p = re.compile('(?iu)' + upper_char)
797 q = p.match(lower_char)
798 self.assertNotEqual(q, None)
799
800 p = re.compile('(?iu)' + lower_char)
801 q = p.match(upper_char)
802 self.assertNotEqual(q, None)
803
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000804 def test_dollar_matches_twice(self):
805 "$ matches the end of string, and just before the terminating \n"
806 pattern = re.compile('$')
807 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
808 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
809 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
810
811 pattern = re.compile('$', re.MULTILINE)
812 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
813 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
814 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
815
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000816 def test_dealloc(self):
817 # issue 3299: check for segfault in debug build
818 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000819 # the overflow limit is different on wide and narrow builds and it
820 # depends on the definition of SRE_CODE (see sre.h).
821 # 2**128 should be big enough to overflow on both. For smaller values
822 # a RuntimeError is raised instead of OverflowError.
823 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000824 self.assertRaises(TypeError, re.finditer, "a", {})
825 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000826
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200827 def test_compile(self):
828 # Test return value when given string and pattern as parameter
829 pattern = re.compile('random pattern')
830 self.assertIsInstance(pattern, re._pattern_type)
831 same_pattern = re.compile(pattern)
832 self.assertIsInstance(same_pattern, re._pattern_type)
833 self.assertIs(same_pattern, pattern)
834 # Test behaviour when not given a string or pattern as parameter
835 self.assertRaises(TypeError, re.compile, 0)
836
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200837 def test_bug_13899(self):
838 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
839 # nothing. Ditto B and Z.
840 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
841 ['A', 'B', '\b', 'C', 'Z'])
842
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100843 @precisionbigmemtest(size=_2G, memuse=1)
844 def test_large_search(self, size):
845 # Issue #10182: indices were 32-bit-truncated.
846 s = 'a' * size
847 m = re.search('$', s)
848 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100849 self.assertEqual(m.start(), size)
850 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100851
Antoine Pitroub83575b2012-12-02 12:52:36 +0100852 # The huge memuse is because of re.sub() using a list and a join()
853 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100854 @precisionbigmemtest(size=_2G, memuse=16 + 2)
855 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100856 # Issue #10182: indices were 32-bit-truncated.
857 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100858 r, n = re.subn('', '', s)
859 self.assertEqual(r, s)
860 self.assertEqual(n, size + 1)
861
862
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200863 def test_repeat_minmax_overflow(self):
864 # Issue #13169
865 string = "x" * 100000
866 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
867 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
868 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
869 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
870 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
871 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
872 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
873 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
874 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
875 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
876 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
877
878 @cpython_only
879 def test_repeat_minmax_overflow_maxrepeat(self):
880 try:
881 from _sre import MAXREPEAT
882 except ImportError:
883 self.skipTest('requires _sre.MAXREPEAT constant')
884 string = "x" * 100000
885 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
886 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
887 (0, 100000))
888 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
889 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
890 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
891 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
892
R David Murray60773392013-04-14 13:08:50 -0400893 def test_backref_group_name_in_exception(self):
894 # Issue 17341: Poor error message when compiling invalid regex
895 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
896 re.compile('(?P=<foo>)')
897
898 def test_group_name_in_exception(self):
899 # Issue 17341: Poor error message when compiling invalid regex
900 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
901 re.compile('(?P<?foo>)')
902
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300903 def test_issue17998(self):
904 for reps in '*', '+', '?', '{1}':
905 for mod in '', '?':
906 pattern = '.' + reps + mod + 'yz'
907 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
908 ['xyz'], msg=pattern)
909 pattern = pattern.encode()
910 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
911 [b'xyz'], msg=pattern)
912
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200913
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300914 def test_bug_2537(self):
915 # issue 2537: empty submatches
916 for outer_op in ('{0,}', '*', '+', '{1,187}'):
917 for inner_op in ('{0,}', '*', '?'):
918 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
919 m = r.match("xyyzy")
920 self.assertEqual(m.group(0), "xyy")
921 self.assertEqual(m.group(1), "")
922 self.assertEqual(m.group(2), "y")
923
Antoine Pitrouf5814112014-02-03 20:59:59 +0100924 def test_debug_flag(self):
925 with captured_stdout() as out:
926 re.compile('foo', re.DEBUG)
927 self.assertEqual(out.getvalue().splitlines(),
928 ['literal 102', 'literal 111', 'literal 111'])
929 # Debug output is output again even a second time (bypassing
930 # the cache -- issue #20426).
931 with captured_stdout() as out:
932 re.compile('foo', re.DEBUG)
933 self.assertEqual(out.getvalue().splitlines(),
934 ['literal 102', 'literal 111', 'literal 111'])
935
936
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000937def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000938 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000939 if verbose:
940 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000941 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000942 # To save time, only run the first and last 10 tests
943 #tests = tests[:10] + tests[-10:]
944 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000945
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000946 for t in tests:
947 sys.stdout.flush()
948 pattern = s = outcome = repl = expected = None
949 if len(t) == 5:
950 pattern, s, outcome, repl, expected = t
951 elif len(t) == 3:
952 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000953 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000954 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
955
Guido van Rossum41360a41998-03-26 19:42:58 +0000956 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000957 obj = re.compile(pattern)
958 except re.error:
959 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000960 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000961 print '=== Syntax error:', t
962 except KeyboardInterrupt: raise KeyboardInterrupt
963 except:
964 print '*** Unexpected error ***', t
965 if verbose:
966 traceback.print_exc(file=sys.stdout)
967 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000968 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000969 result = obj.search(s)
970 except re.error, msg:
971 print '=== Unexpected exception', t, repr(msg)
972 if outcome == SYNTAX_ERROR:
973 # This should have been a syntax error; forget it.
974 pass
975 elif outcome == FAIL:
976 if result is None: pass # No match, as expected
977 else: print '=== Succeeded incorrectly', t
978 elif outcome == SUCCEED:
979 if result is not None:
980 # Matched, as expected, so now we compute the
981 # result string and compare it to our expected result.
982 start, end = result.span(0)
983 vardict={'found': result.group(0),
984 'groups': result.group(),
985 'flags': result.re.flags}
986 for i in range(1, 100):
987 try:
988 gi = result.group(i)
989 # Special hack because else the string concat fails:
990 if gi is None:
991 gi = "None"
992 except IndexError:
993 gi = "Error"
994 vardict['g%d' % i] = gi
995 for i in result.re.groupindex.keys():
996 try:
997 gi = result.group(i)
998 if gi is None:
999 gi = "None"
1000 except IndexError:
1001 gi = "Error"
1002 vardict[i] = gi
1003 repl = eval(repl, vardict)
1004 if repl != expected:
1005 print '=== grouping error', t,
1006 print repr(repl) + ' should be ' + repr(expected)
1007 else:
1008 print '=== Failed incorrectly', t
1009
1010 # Try the match on a unicode string, and check that it
1011 # still succeeds.
1012 try:
1013 result = obj.search(unicode(s, "latin-1"))
1014 if result is None:
1015 print '=== Fails on unicode match', t
1016 except NameError:
1017 continue # 1.5.2
1018 except TypeError:
1019 continue # unicode test case
1020
1021 # Try the match on a unicode pattern, and check that it
1022 # still succeeds.
1023 obj=re.compile(unicode(pattern, "latin-1"))
1024 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001025 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001026 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001027
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001028 # Try the match with the search area limited to the extent
1029 # of the match and see if it still succeeds. \B will
1030 # break (because it won't match at the end or start of a
1031 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001032
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001033 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1034 and result is not None:
1035 obj = re.compile(pattern)
1036 result = obj.search(s, result.start(0), result.end(0) + 1)
1037 if result is None:
1038 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001039
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001040 # Try the match with IGNORECASE enabled, and check that it
1041 # still succeeds.
1042 obj = re.compile(pattern, re.IGNORECASE)
1043 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001044 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001045 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001046
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001047 # Try the match with LOCALE enabled, and check that it
1048 # still succeeds.
1049 obj = re.compile(pattern, re.LOCALE)
1050 result = obj.search(s)
1051 if result is None:
1052 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001053
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001054 # Try the match with UNICODE locale enabled, and check
1055 # that it still succeeds.
1056 obj = re.compile(pattern, re.UNICODE)
1057 result = obj.search(s)
1058 if result is None:
1059 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001060
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001061def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001062 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001063 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001064
1065if __name__ == "__main__":
1066 test_main()