blob: 2be5f5cb2eabb0667575e397a3799b7e7c14bd30 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00004from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04005import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02006import sys
7import string
8import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00009from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000010
Antoine Pitrou735f36e2012-12-03 20:53:12 +010011
Guido van Rossum23b22571997-07-17 22:36:14 +000012# Misc tests from Tim Peters' re.doc
13
Just van Rossum6802c6e2003-07-02 14:36:59 +000014# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020015# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000016# cover most of the code.
17
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000019
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000021
22 def test_weakref(self):
23 s = 'QabbbcR'
24 x = re.compile('ab+c')
25 y = proxy(x)
26 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27
Skip Montanaro8ed06da2003-04-24 19:43:18 +000028 def test_search_star_plus(self):
29 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000033 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000034 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000038 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000039
Skip Montanaro8ed06da2003-04-24 19:43:18 +000040 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000041 int_value = int(matchobj.group(0))
42 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000043
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 def test_basic_re_sub(self):
45 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47 '9.3 -3 24x100y')
48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 s = r"\1\1"
55 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000058
Skip Montanaro8ed06da2003-04-24 19:43:18 +000059 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000063
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000071
Skip Montanaro2726fcd2003-04-25 14:31:54 +000072 def test_bug_449964(self):
73 # fails for group followed by other escape
74 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75 'xx\bxx\b')
76
77 def test_bug_449000(self):
78 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000079 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000087
Guido van Rossum1ff91d92007-09-10 22:02:25 +000088 def test_bug_1140(self):
89 # re.sub(x, y, u'') should return u'', not '', and
90 # re.sub(x, y, '') should return '', not u''.
91 # Also:
92 # re.sub(x, y, unicode(x)) should return unicode(y), and
93 # re.sub(x, y, str(x)) should return
94 # str(y) if isinstance(y, str) else unicode(y).
95 for x in 'x', u'x':
96 for y in 'y', u'y':
97 z = re.sub(x, y, u'')
98 self.assertEqual(z, u'')
99 self.assertEqual(type(z), unicode)
100 #
101 z = re.sub(x, y, '')
102 self.assertEqual(z, '')
103 self.assertEqual(type(z), str)
104 #
105 z = re.sub(x, y, unicode(x))
106 self.assertEqual(z, y)
107 self.assertEqual(type(z), unicode)
108 #
109 z = re.sub(x, y, str(x))
110 self.assertEqual(z, y)
111 self.assertEqual(type(z), type(y))
112
Raymond Hettinger80016c92007-12-19 18:13:31 +0000113 def test_bug_1661(self):
114 # Verify that flags do not get silently ignored with compiled patterns
115 pattern = re.compile('.')
116 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.compile, pattern, re.I)
120
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000121 def test_bug_3629(self):
122 # A regex that triggered a bug in the sre-code validator
123 re.compile("(?P<quote>)(?(quote))")
124
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000125 def test_sub_template_numeric_escape(self):
126 # bug 776311 and friends
127 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134
135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137
138 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143
144 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000146
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000147 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159
160 # in python2.3 (etc), these loop endlessly in sre_parser.py
161 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163 'xz8')
164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165 'xza')
166
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000167 def test_qualified_re_sub(self):
168 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000170
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000171 def test_bug_114660(self):
172 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
173 'hello there')
174
175 def test_bug_462270(self):
176 # Test for empty sub() behaviour, see SF bug #462270
177 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179
Ezio Melottief317382012-11-03 20:31:12 +0200180 def test_symbolic_groups(self):
181 re.compile('(?P<a>x)(?P=a)(?(a)y)')
182 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184 self.assertRaises(re.error, re.compile, '(?Px)')
185 self.assertRaises(re.error, re.compile, '(?P=)')
186 self.assertRaises(re.error, re.compile, '(?P=1)')
187 self.assertRaises(re.error, re.compile, '(?P=a)')
188 self.assertRaises(re.error, re.compile, '(?P=a1)')
189 self.assertRaises(re.error, re.compile, '(?P=a.)')
190 self.assertRaises(re.error, re.compile, '(?P<)')
191 self.assertRaises(re.error, re.compile, '(?P<>)')
192 self.assertRaises(re.error, re.compile, '(?P<1>)')
193 self.assertRaises(re.error, re.compile, '(?P<a.>)')
194 self.assertRaises(re.error, re.compile, '(?())')
195 self.assertRaises(re.error, re.compile, '(?(a))')
196 self.assertRaises(re.error, re.compile, '(?(1a))')
197 self.assertRaises(re.error, re.compile, '(?(a.))')
198
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000199 def test_symbolic_refs(self):
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
208 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000209 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 def test_re_subn(self):
212 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000217
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000218 def test_re_split(self):
219 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221 self.assertEqual(re.split("(:*)", ":a:b::c"),
222 ['', ':', 'a', ':', 'b', '::', 'c'])
223 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224 self.assertEqual(re.split("(:)*", ":a:b::c"),
225 ['', ':', 'a', ':', 'b', ':', 'c'])
226 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227 ['', ':', 'a', ':b::', 'c'])
228 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
229 ['', None, ':', 'a', None, ':', '', 'b', None, '',
230 None, '::', 'c'])
231 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
232 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000233
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000234 def test_qualified_re_split(self):
235 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237 self.assertEqual(re.split("(:)", ":a:b::c", 2),
238 ['', ':', 'a', ':', 'b::c'])
239 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000241
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000242 def test_re_findall(self):
243 self.assertEqual(re.findall(":+", "abc"), [])
244 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247 (":", ":"),
248 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000249
Skip Montanaro5ba00542003-04-25 16:00:14 +0000250 def test_bug_117612(self):
251 self.assertEqual(re.findall(r"(a|(b))", "aba"),
252 [("a", ""),("b", "b"),("a", "")])
253
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000254 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000255 self.assertEqual(re.match('a', 'a').groups(), ())
256 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000260
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000261 pat = re.compile('((a)|(b))(c)?')
262 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000267
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000268 # A single group
269 m = re.match('(a)', 'a')
270 self.assertEqual(m.group(0), 'a')
271 self.assertEqual(m.group(0), 'a')
272 self.assertEqual(m.group(1), 'a')
273 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000274
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000275 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
276 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278 (None, 'b', None))
279 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000280
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000281 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000282 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
283 ('(', 'a'))
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
285 (None, 'a'))
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
289 ('a', 'b'))
290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
291 (None, 'd'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
293 (None, 'd'))
294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
295 ('a', ''))
296
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000297 # Tests for bug #1177831: exercise groups other than the first group
298 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
299 self.assertEqual(p.match('abc').groups(),
300 ('a', 'b', 'c'))
301 self.assertEqual(p.match('ad').groups(),
302 ('a', None, 'd'))
303 self.assertEqual(p.match('abd'), None)
304 self.assertEqual(p.match('ac'), None)
305
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000306
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000307 def test_re_groupref(self):
308 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
309 ('|', 'a'))
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
311 (None, 'a'))
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
314 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
315 ('a', 'a'))
316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
317 (None, None))
318
319 def test_groupdict(self):
320 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321 'first second').groupdict(),
322 {'first':'first', 'second':'second'})
323
324 def test_expand(self):
325 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326 "first second")
327 .expand(r"\2 \1 \g<second> \g<first>"),
328 "second first second first")
329
330 def test_repeat_minmax(self):
331 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335
336 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344
345 self.assertEqual(re.match("^x{1}$", "xxx"), None)
346 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349
350 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000359 self.assertEqual(re.match("^x{}$", "xxx"), None)
360 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000362 def test_getattr(self):
363 self.assertEqual(re.match("(a)", "a").pos, 0)
364 self.assertEqual(re.match("(a)", "a").endpos, 1)
365 self.assertEqual(re.match("(a)", "a").string, "a")
366 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367 self.assertNotEqual(re.match("(a)", "a").re, None)
368
369 def test_special_escapes(self):
370 self.assertEqual(re.search(r"\b(b.)\b",
371 "abcd abc bcd bx").group(1), "bx")
372 self.assertEqual(re.search(r"\B(b.)\B",
373 "abc bcd bc abxd").group(1), "bx")
374 self.assertEqual(re.search(r"\b(b.)\b",
375 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
376 self.assertEqual(re.search(r"\B(b.)\B",
377 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
378 self.assertEqual(re.search(r"\b(b.)\b",
379 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
380 self.assertEqual(re.search(r"\B(b.)\B",
381 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
382 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385 self.assertEqual(re.search(r"\b(b.)\b",
386 u"abcd abc bcd bx").group(1), "bx")
387 self.assertEqual(re.search(r"\B(b.)\B",
388 u"abc bcd bc abxd").group(1), "bx")
389 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392 self.assertEqual(re.search(r"\d\D\w\W\s\S",
393 "1aa! a").group(0), "1aa! a")
394 self.assertEqual(re.search(r"\d\D\w\W\s\S",
395 "1aa! a", re.LOCALE).group(0), "1aa! a")
396 self.assertEqual(re.search(r"\d\D\w\W\s\S",
397 "1aa! a", re.UNICODE).group(0), "1aa! a")
398
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200399 def test_string_boundaries(self):
400 # See http://bugs.python.org/issue10713
401 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402 "abc")
403 # There's a word boundary at the start of a string.
404 self.assertTrue(re.match(r"\b", "abc"))
405 # A non-empty string includes a non-boundary zero-length match.
406 self.assertTrue(re.search(r"\B", "abc"))
407 # There is no non-boundary match at the start of a string.
408 self.assertFalse(re.match(r"\B", "abc"))
409 # However, an empty string contains no word boundaries, and also no
410 # non-boundaries.
411 self.assertEqual(re.search(r"\B", ""), None)
412 # This one is questionable and different from the perlre behaviour,
413 # but describes current behavior.
414 self.assertEqual(re.search(r"\b", ""), None)
415 # A single word-character string has two boundaries, but no
416 # non-boundary gaps.
417 self.assertEqual(len(re.findall(r"\b", "a")), 2)
418 self.assertEqual(len(re.findall(r"\B", "a")), 0)
419 # If there are no words, there are no boundaries
420 self.assertEqual(len(re.findall(r"\b", " ")), 0)
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 # Can match around the whitespace.
423 self.assertEqual(len(re.findall(r"\B", " ")), 2)
424
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000425 def test_bigcharset(self):
426 self.assertEqual(re.match(u"([\u2222\u2223])",
427 u"\u2222").group(1), u"\u2222")
428 self.assertEqual(re.match(u"([\u2222\u2223])",
429 u"\u2222", re.UNICODE).group(1), u"\u2222")
430
Antoine Pitroub83ea142012-11-20 22:30:42 +0100431 def test_big_codesize(self):
432 # Issue #1160
433 r = re.compile('|'.join(('%d'%x for x in range(10000))))
434 self.assertIsNotNone(r.match('1000'))
435 self.assertIsNotNone(r.match('9999'))
436
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000437 def test_anyall(self):
438 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
439 "a\nb")
440 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
441 "a\n\nb")
442
443 def test_non_consuming(self):
444 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
445 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
446 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
447 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
448 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
450 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
451
452 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
453 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
454 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
455 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
456
457 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000458 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
459 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000460 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
461 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
462 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
463 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
464 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
465 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
466 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
467 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
468
469 def test_category(self):
470 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
471
472 def test_getlower(self):
473 import _sre
474 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
475 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
476 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
477
478 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
479 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
480
481 def test_not_literal(self):
482 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
483 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
484
485 def test_search_coverage(self):
486 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
487 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
488
Ezio Melotti46645632011-03-25 14:50:52 +0200489 def assertMatch(self, pattern, text, match=None, span=None,
490 matcher=re.match):
491 if match is None and span is None:
492 # the pattern matches the whole text
493 match = text
494 span = (0, len(text))
495 elif match is None or span is None:
496 raise ValueError('If match is not None, span should be specified '
497 '(and vice versa).')
498 m = matcher(pattern, text)
499 self.assertTrue(m)
500 self.assertEqual(m.group(), match)
501 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000502
Ezio Melotti46645632011-03-25 14:50:52 +0200503 def test_re_escape(self):
504 alnum_chars = string.ascii_letters + string.digits
505 p = u''.join(unichr(i) for i in range(256))
506 for c in p:
507 if c in alnum_chars:
508 self.assertEqual(re.escape(c), c)
509 elif c == u'\x00':
510 self.assertEqual(re.escape(c), u'\\000')
511 else:
512 self.assertEqual(re.escape(c), u'\\' + c)
513 self.assertMatch(re.escape(c), c)
514 self.assertMatch(re.escape(p), p)
515
516 def test_re_escape_byte(self):
517 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
518 p = ''.join(chr(i) for i in range(256))
519 for b in p:
520 if b in alnum_chars:
521 self.assertEqual(re.escape(b), b)
522 elif b == b'\x00':
523 self.assertEqual(re.escape(b), b'\\000')
524 else:
525 self.assertEqual(re.escape(b), b'\\' + b)
526 self.assertMatch(re.escape(b), b)
527 self.assertMatch(re.escape(p), p)
528
529 def test_re_escape_non_ascii(self):
530 s = u'xxx\u2620\u2620\u2620xxx'
531 s_escaped = re.escape(s)
532 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
533 self.assertMatch(s_escaped, s)
534 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
535 u'x\u2620\u2620\u2620x', (2, 7), re.search)
536
537 def test_re_escape_non_ascii_bytes(self):
538 b = u'y\u2620y\u2620y'.encode('utf-8')
539 b_escaped = re.escape(b)
540 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
541 self.assertMatch(b_escaped, b)
542 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
543 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000544
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000545 def test_pickling(self):
546 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000547 self.pickle_test(pickle)
548 import cPickle
549 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000550 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000551 import_module("sre", deprecated=True)
552 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000553
554 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000555 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
556 s = pickle.dumps(oldpat)
557 newpat = pickle.loads(s)
558 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000559
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000560 def test_constants(self):
561 self.assertEqual(re.I, re.IGNORECASE)
562 self.assertEqual(re.L, re.LOCALE)
563 self.assertEqual(re.M, re.MULTILINE)
564 self.assertEqual(re.S, re.DOTALL)
565 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000566
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000567 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000568 for flag in [re.I, re.M, re.X, re.S, re.L]:
569 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000570
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000571 def test_sre_character_literals(self):
572 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
573 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
574 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
575 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
576 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
577 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
578 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
579 self.assertRaises(re.error, re.match, "\911", "")
580
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000581 def test_sre_character_class_literals(self):
582 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
583 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
584 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
585 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
588 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
589 self.assertRaises(re.error, re.match, "[\911]", "")
590
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000591 def test_bug_113254(self):
592 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
593 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
594 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
595
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000596 def test_bug_527371(self):
597 # bug described in patches 527371/672491
598 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
599 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
600 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
601 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
602 self.assertEqual(re.match("((a))", "a").lastindex, 1)
603
604 def test_bug_545855(self):
605 # bug 545855 -- This pattern failed to cause a compile error as it
606 # should, instead provoking a TypeError.
607 self.assertRaises(re.error, re.compile, 'foo[a-')
608
609 def test_bug_418626(self):
610 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
611 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
612 # pattern '*?' on a long string.
613 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
614 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
615 20003)
616 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000617 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000618 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000619 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000620
621 def test_bug_612074(self):
622 pat=u"["+re.escape(u"\u2039")+u"]"
623 self.assertEqual(re.compile(pat) and 1, 1)
624
Skip Montanaro1e703c62003-04-25 15:40:28 +0000625 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000626 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000627 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000628 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
629 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
630 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000631
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200632 def test_unlimited_zero_width_repeat(self):
633 # Issue #9669
634 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
635 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
636 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
637 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
638 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
639 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
640
Skip Montanaro1e703c62003-04-25 15:40:28 +0000641 def test_scanner(self):
642 def s_ident(scanner, token): return token
643 def s_operator(scanner, token): return "op%s" % token
644 def s_float(scanner, token): return float(token)
645 def s_int(scanner, token): return int(token)
646
647 scanner = Scanner([
648 (r"[a-zA-Z_]\w*", s_ident),
649 (r"\d+\.\d*", s_float),
650 (r"\d+", s_int),
651 (r"=|\+|-|\*|/", s_operator),
652 (r"\s+", None),
653 ])
654
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000655 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
656
Skip Montanaro1e703c62003-04-25 15:40:28 +0000657 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
658 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
659 'op+', 'bar'], ''))
660
Skip Montanaro5ba00542003-04-25 16:00:14 +0000661 def test_bug_448951(self):
662 # bug 448951 (similar to 429357, but with single char match)
663 # (Also test greedy matches.)
664 for op in '','?','*':
665 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
666 (None, None))
667 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
668 ('a:', 'a'))
669
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000670 def test_bug_725106(self):
671 # capturing groups in alternatives in repeats
672 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
673 ('b', 'a'))
674 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
675 ('c', 'b'))
676 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
677 ('b', None))
678 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
679 ('b', None))
680 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
681 ('b', 'a'))
682 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
683 ('c', 'b'))
684 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
685 ('b', None))
686 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
687 ('b', None))
688
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000689 def test_bug_725149(self):
690 # mark_stack_base restoring before restoring marks
691 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
692 ('a', None))
693 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
694 ('a', None, None))
695
Just van Rossum12723ba2003-07-02 20:03:04 +0000696 def test_bug_764548(self):
697 # bug 764548, re.compile() barfs on str/unicode subclasses
698 try:
699 unicode
700 except NameError:
701 return # no problem if we have no unicode
702 class my_unicode(unicode): pass
703 pat = re.compile(my_unicode("abc"))
704 self.assertEqual(pat.match("xyz"), None)
705
Skip Montanaro5ba00542003-04-25 16:00:14 +0000706 def test_finditer(self):
707 iter = re.finditer(r":+", "a:b::c:::d")
708 self.assertEqual([item.group(0) for item in iter],
709 [":", "::", ":::"])
710
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000711 def test_bug_926075(self):
712 try:
713 unicode
714 except NameError:
715 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000716 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000717 re.compile(eval("u'bug_926075'")))
718
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000719 def test_bug_931848(self):
720 try:
721 unicode
722 except NameError:
723 pass
724 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
725 self.assertEqual(re.compile(pattern).split("a.b.c"),
726 ['a','b','c'])
727
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000728 def test_bug_581080(self):
729 iter = re.finditer(r"\s", "a b")
730 self.assertEqual(iter.next().span(), (1,2))
731 self.assertRaises(StopIteration, iter.next)
732
733 scanner = re.compile(r"\s").scanner("a b")
734 self.assertEqual(scanner.search().span(), (1, 2))
735 self.assertEqual(scanner.search(), None)
736
737 def test_bug_817234(self):
738 iter = re.finditer(r".*", "asdf")
739 self.assertEqual(iter.next().span(), (0, 4))
740 self.assertEqual(iter.next().span(), (4, 4))
741 self.assertRaises(StopIteration, iter.next)
742
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000743 def test_bug_6561(self):
744 # '\d' should match characters in Unicode category 'Nd'
745 # (Number, Decimal Digit), but not those in 'Nl' (Number,
746 # Letter) or 'No' (Number, Other).
747 decimal_digits = [
748 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
749 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
750 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
751 ]
752 for x in decimal_digits:
753 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
754
755 not_decimal_digits = [
756 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
757 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
758 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
759 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
760 ]
761 for x in not_decimal_digits:
762 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
763
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000764 def test_empty_array(self):
765 # SF buf 1647541
766 import array
767 for typecode in 'cbBuhHiIlLfd':
768 a = array.array(typecode)
769 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000770 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000771
Guido van Rossumae04c332008-01-03 19:12:44 +0000772 def test_inline_flags(self):
773 # Bug #1700
774 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
775 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
776
777 p = re.compile(upper_char, re.I | re.U)
778 q = p.match(lower_char)
779 self.assertNotEqual(q, None)
780
781 p = re.compile(lower_char, re.I | re.U)
782 q = p.match(upper_char)
783 self.assertNotEqual(q, None)
784
785 p = re.compile('(?i)' + upper_char, re.U)
786 q = p.match(lower_char)
787 self.assertNotEqual(q, None)
788
789 p = re.compile('(?i)' + lower_char, re.U)
790 q = p.match(upper_char)
791 self.assertNotEqual(q, None)
792
793 p = re.compile('(?iu)' + upper_char)
794 q = p.match(lower_char)
795 self.assertNotEqual(q, None)
796
797 p = re.compile('(?iu)' + lower_char)
798 q = p.match(upper_char)
799 self.assertNotEqual(q, None)
800
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000801 def test_dollar_matches_twice(self):
802 "$ matches the end of string, and just before the terminating \n"
803 pattern = re.compile('$')
804 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
805 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
806 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
807
808 pattern = re.compile('$', re.MULTILINE)
809 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
810 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
811 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
812
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000813 def test_dealloc(self):
814 # issue 3299: check for segfault in debug build
815 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000816 # the overflow limit is different on wide and narrow builds and it
817 # depends on the definition of SRE_CODE (see sre.h).
818 # 2**128 should be big enough to overflow on both. For smaller values
819 # a RuntimeError is raised instead of OverflowError.
820 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000821 self.assertRaises(TypeError, re.finditer, "a", {})
822 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000823
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200824 def test_compile(self):
825 # Test return value when given string and pattern as parameter
826 pattern = re.compile('random pattern')
827 self.assertIsInstance(pattern, re._pattern_type)
828 same_pattern = re.compile(pattern)
829 self.assertIsInstance(same_pattern, re._pattern_type)
830 self.assertIs(same_pattern, pattern)
831 # Test behaviour when not given a string or pattern as parameter
832 self.assertRaises(TypeError, re.compile, 0)
833
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200834 def test_bug_13899(self):
835 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
836 # nothing. Ditto B and Z.
837 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
838 ['A', 'B', '\b', 'C', 'Z'])
839
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100840 @precisionbigmemtest(size=_2G, memuse=1)
841 def test_large_search(self, size):
842 # Issue #10182: indices were 32-bit-truncated.
843 s = 'a' * size
844 m = re.search('$', s)
845 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100846 self.assertEqual(m.start(), size)
847 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100848
Antoine Pitroub83575b2012-12-02 12:52:36 +0100849 # The huge memuse is because of re.sub() using a list and a join()
850 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100851 @precisionbigmemtest(size=_2G, memuse=16 + 2)
852 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100853 # Issue #10182: indices were 32-bit-truncated.
854 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100855 r, n = re.subn('', '', s)
856 self.assertEqual(r, s)
857 self.assertEqual(n, size + 1)
858
859
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200860 def test_repeat_minmax_overflow(self):
861 # Issue #13169
862 string = "x" * 100000
863 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
864 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
865 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
866 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
867 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
868 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
869 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
870 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
871 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
872 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
873 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
874
875 @cpython_only
876 def test_repeat_minmax_overflow_maxrepeat(self):
877 try:
878 from _sre import MAXREPEAT
879 except ImportError:
880 self.skipTest('requires _sre.MAXREPEAT constant')
881 string = "x" * 100000
882 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
883 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
884 (0, 100000))
885 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
886 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
887 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
888 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
889
R David Murray60773392013-04-14 13:08:50 -0400890 def test_backref_group_name_in_exception(self):
891 # Issue 17341: Poor error message when compiling invalid regex
892 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
893 re.compile('(?P=<foo>)')
894
895 def test_group_name_in_exception(self):
896 # Issue 17341: Poor error message when compiling invalid regex
897 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
898 re.compile('(?P<?foo>)')
899
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300900 def test_issue17998(self):
901 for reps in '*', '+', '?', '{1}':
902 for mod in '', '?':
903 pattern = '.' + reps + mod + 'yz'
904 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
905 ['xyz'], msg=pattern)
906 pattern = pattern.encode()
907 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
908 [b'xyz'], msg=pattern)
909
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200910
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000911def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000912 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000913 if verbose:
914 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000915 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000916 # To save time, only run the first and last 10 tests
917 #tests = tests[:10] + tests[-10:]
918 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000919
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000920 for t in tests:
921 sys.stdout.flush()
922 pattern = s = outcome = repl = expected = None
923 if len(t) == 5:
924 pattern, s, outcome, repl, expected = t
925 elif len(t) == 3:
926 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000927 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000928 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
929
Guido van Rossum41360a41998-03-26 19:42:58 +0000930 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000931 obj = re.compile(pattern)
932 except re.error:
933 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000934 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000935 print '=== Syntax error:', t
936 except KeyboardInterrupt: raise KeyboardInterrupt
937 except:
938 print '*** Unexpected error ***', t
939 if verbose:
940 traceback.print_exc(file=sys.stdout)
941 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000942 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000943 result = obj.search(s)
944 except re.error, msg:
945 print '=== Unexpected exception', t, repr(msg)
946 if outcome == SYNTAX_ERROR:
947 # This should have been a syntax error; forget it.
948 pass
949 elif outcome == FAIL:
950 if result is None: pass # No match, as expected
951 else: print '=== Succeeded incorrectly', t
952 elif outcome == SUCCEED:
953 if result is not None:
954 # Matched, as expected, so now we compute the
955 # result string and compare it to our expected result.
956 start, end = result.span(0)
957 vardict={'found': result.group(0),
958 'groups': result.group(),
959 'flags': result.re.flags}
960 for i in range(1, 100):
961 try:
962 gi = result.group(i)
963 # Special hack because else the string concat fails:
964 if gi is None:
965 gi = "None"
966 except IndexError:
967 gi = "Error"
968 vardict['g%d' % i] = gi
969 for i in result.re.groupindex.keys():
970 try:
971 gi = result.group(i)
972 if gi is None:
973 gi = "None"
974 except IndexError:
975 gi = "Error"
976 vardict[i] = gi
977 repl = eval(repl, vardict)
978 if repl != expected:
979 print '=== grouping error', t,
980 print repr(repl) + ' should be ' + repr(expected)
981 else:
982 print '=== Failed incorrectly', t
983
984 # Try the match on a unicode string, and check that it
985 # still succeeds.
986 try:
987 result = obj.search(unicode(s, "latin-1"))
988 if result is None:
989 print '=== Fails on unicode match', t
990 except NameError:
991 continue # 1.5.2
992 except TypeError:
993 continue # unicode test case
994
995 # Try the match on a unicode pattern, and check that it
996 # still succeeds.
997 obj=re.compile(unicode(pattern, "latin-1"))
998 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000999 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001000 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001001
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001002 # Try the match with the search area limited to the extent
1003 # of the match and see if it still succeeds. \B will
1004 # break (because it won't match at the end or start of a
1005 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001006
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001007 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1008 and result is not None:
1009 obj = re.compile(pattern)
1010 result = obj.search(s, result.start(0), result.end(0) + 1)
1011 if result is None:
1012 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001013
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001014 # Try the match with IGNORECASE enabled, and check that it
1015 # still succeeds.
1016 obj = re.compile(pattern, re.IGNORECASE)
1017 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001018 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001019 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001020
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001021 # Try the match with LOCALE enabled, and check that it
1022 # still succeeds.
1023 obj = re.compile(pattern, re.LOCALE)
1024 result = obj.search(s)
1025 if result is None:
1026 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001027
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001028 # Try the match with UNICODE locale enabled, and check
1029 # that it still succeeds.
1030 obj = re.compile(pattern, re.UNICODE)
1031 result = obj.search(s)
1032 if result is None:
1033 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001034
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001035def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001036 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001037 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001038
1039if __name__ == "__main__":
1040 test_main()