blob: 6ddddda9e1bd2eb0161a7edb287c49544cc90756 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00004from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02005import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Antoine Pitrou735f36e2012-12-03 20:53:12 +010010
Guido van Rossum23b22571997-07-17 22:36:14 +000011# Misc tests from Tim Peters' re.doc
12
Just van Rossum6802c6e2003-07-02 14:36:59 +000013# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020014# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000015# cover most of the code.
16
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000018
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000020
21 def test_weakref(self):
22 s = 'QabbbcR'
23 x = re.compile('ab+c')
24 y = proxy(x)
25 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
26
Skip Montanaro8ed06da2003-04-24 19:43:18 +000027 def test_search_star_plus(self):
28 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
29 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
30 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
31 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000032 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000033 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
34 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
35 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
36 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000037 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000038
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000040 int_value = int(matchobj.group(0))
41 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000042
Skip Montanaro8ed06da2003-04-24 19:43:18 +000043 def test_basic_re_sub(self):
44 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
46 '9.3 -3 24x100y')
47 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
48 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000049
Skip Montanaro8ed06da2003-04-24 19:43:18 +000050 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
51 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 s = r"\1\1"
54 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
55 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
56 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000057
Skip Montanaro8ed06da2003-04-24 19:43:18 +000058 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000062
Skip Montanaro8ed06da2003-04-24 19:43:18 +000063 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
64 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
67 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000068
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000070
Skip Montanaro2726fcd2003-04-25 14:31:54 +000071 def test_bug_449964(self):
72 # fails for group followed by other escape
73 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
74 'xx\bxx\b')
75
76 def test_bug_449000(self):
77 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000078 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
84 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
85 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000086
Guido van Rossum1ff91d92007-09-10 22:02:25 +000087 def test_bug_1140(self):
88 # re.sub(x, y, u'') should return u'', not '', and
89 # re.sub(x, y, '') should return '', not u''.
90 # Also:
91 # re.sub(x, y, unicode(x)) should return unicode(y), and
92 # re.sub(x, y, str(x)) should return
93 # str(y) if isinstance(y, str) else unicode(y).
94 for x in 'x', u'x':
95 for y in 'y', u'y':
96 z = re.sub(x, y, u'')
97 self.assertEqual(z, u'')
98 self.assertEqual(type(z), unicode)
99 #
100 z = re.sub(x, y, '')
101 self.assertEqual(z, '')
102 self.assertEqual(type(z), str)
103 #
104 z = re.sub(x, y, unicode(x))
105 self.assertEqual(z, y)
106 self.assertEqual(type(z), unicode)
107 #
108 z = re.sub(x, y, str(x))
109 self.assertEqual(z, y)
110 self.assertEqual(type(z), type(y))
111
Raymond Hettinger80016c92007-12-19 18:13:31 +0000112 def test_bug_1661(self):
113 # Verify that flags do not get silently ignored with compiled patterns
114 pattern = re.compile('.')
115 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.compile, pattern, re.I)
119
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000120 def test_bug_3629(self):
121 # A regex that triggered a bug in the sre-code validator
122 re.compile("(?P<quote>)(?(quote))")
123
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000124 def test_sub_template_numeric_escape(self):
125 # bug 776311 and friends
126 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
127 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
128 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
129 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
130 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
131 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
132 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
133
134 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
136
137 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
138 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
139 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
140 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
141 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
142
143 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
144 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000145
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000146 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
155 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
157 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
158
159 # in python2.3 (etc), these loop endlessly in sre_parser.py
160 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
162 'xz8')
163 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
164 'xza')
165
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000166 def test_qualified_re_sub(self):
167 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
168 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000169
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000170 def test_bug_114660(self):
171 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
172 'hello there')
173
174 def test_bug_462270(self):
175 # Test for empty sub() behaviour, see SF bug #462270
176 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
177 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
178
Ezio Melottief317382012-11-03 20:31:12 +0200179 def test_symbolic_groups(self):
180 re.compile('(?P<a>x)(?P=a)(?(a)y)')
181 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
182 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
183 self.assertRaises(re.error, re.compile, '(?Px)')
184 self.assertRaises(re.error, re.compile, '(?P=)')
185 self.assertRaises(re.error, re.compile, '(?P=1)')
186 self.assertRaises(re.error, re.compile, '(?P=a)')
187 self.assertRaises(re.error, re.compile, '(?P=a1)')
188 self.assertRaises(re.error, re.compile, '(?P=a.)')
189 self.assertRaises(re.error, re.compile, '(?P<)')
190 self.assertRaises(re.error, re.compile, '(?P<>)')
191 self.assertRaises(re.error, re.compile, '(?P<1>)')
192 self.assertRaises(re.error, re.compile, '(?P<a.>)')
193 self.assertRaises(re.error, re.compile, '(?())')
194 self.assertRaises(re.error, re.compile, '(?(a))')
195 self.assertRaises(re.error, re.compile, '(?(1a))')
196 self.assertRaises(re.error, re.compile, '(?(a.))')
197
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000198 def test_symbolic_refs(self):
199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
205 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
206 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000208 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000209
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000210 def test_re_subn(self):
211 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
212 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
213 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
214 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
215 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000216
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000217 def test_re_split(self):
218 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
219 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
220 self.assertEqual(re.split("(:*)", ":a:b::c"),
221 ['', ':', 'a', ':', 'b', '::', 'c'])
222 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
223 self.assertEqual(re.split("(:)*", ":a:b::c"),
224 ['', ':', 'a', ':', 'b', ':', 'c'])
225 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
226 ['', ':', 'a', ':b::', 'c'])
227 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
228 ['', None, ':', 'a', None, ':', '', 'b', None, '',
229 None, '::', 'c'])
230 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
231 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000232
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000233 def test_qualified_re_split(self):
234 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
235 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
236 self.assertEqual(re.split("(:)", ":a:b::c", 2),
237 ['', ':', 'a', ':', 'b::c'])
238 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
239 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000240
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000241 def test_re_findall(self):
242 self.assertEqual(re.findall(":+", "abc"), [])
243 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
244 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
245 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
246 (":", ":"),
247 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000248
Skip Montanaro5ba00542003-04-25 16:00:14 +0000249 def test_bug_117612(self):
250 self.assertEqual(re.findall(r"(a|(b))", "aba"),
251 [("a", ""),("b", "b"),("a", "")])
252
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000253 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000254 self.assertEqual(re.match('a', 'a').groups(), ())
255 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
256 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
257 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
258 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000259
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000260 pat = re.compile('((a)|(b))(c)?')
261 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
262 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
263 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
264 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
265 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000266
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000267 # A single group
268 m = re.match('(a)', 'a')
269 self.assertEqual(m.group(0), 'a')
270 self.assertEqual(m.group(0), 'a')
271 self.assertEqual(m.group(1), 'a')
272 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000273
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000274 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
275 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
276 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
277 (None, 'b', None))
278 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000279
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000280 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
282 ('(', 'a'))
283 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
284 (None, 'a'))
285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
287 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
288 ('a', 'b'))
289 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
290 (None, 'd'))
291 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
292 (None, 'd'))
293 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
294 ('a', ''))
295
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000296 # Tests for bug #1177831: exercise groups other than the first group
297 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
298 self.assertEqual(p.match('abc').groups(),
299 ('a', 'b', 'c'))
300 self.assertEqual(p.match('ad').groups(),
301 ('a', None, 'd'))
302 self.assertEqual(p.match('abd'), None)
303 self.assertEqual(p.match('ac'), None)
304
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000305
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000306 def test_re_groupref(self):
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
308 ('|', 'a'))
309 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
310 (None, 'a'))
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
313 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
314 ('a', 'a'))
315 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
316 (None, None))
317
318 def test_groupdict(self):
319 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
320 'first second').groupdict(),
321 {'first':'first', 'second':'second'})
322
323 def test_expand(self):
324 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
325 "first second")
326 .expand(r"\2 \1 \g<second> \g<first>"),
327 "second first second first")
328
329 def test_repeat_minmax(self):
330 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
331 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
334
335 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
343
344 self.assertEqual(re.match("^x{1}$", "xxx"), None)
345 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
346 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
347 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
348
349 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
357
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000358 self.assertEqual(re.match("^x{}$", "xxx"), None)
359 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
360
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361 def test_getattr(self):
362 self.assertEqual(re.match("(a)", "a").pos, 0)
363 self.assertEqual(re.match("(a)", "a").endpos, 1)
364 self.assertEqual(re.match("(a)", "a").string, "a")
365 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
366 self.assertNotEqual(re.match("(a)", "a").re, None)
367
368 def test_special_escapes(self):
369 self.assertEqual(re.search(r"\b(b.)\b",
370 "abcd abc bcd bx").group(1), "bx")
371 self.assertEqual(re.search(r"\B(b.)\B",
372 "abc bcd bc abxd").group(1), "bx")
373 self.assertEqual(re.search(r"\b(b.)\b",
374 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
375 self.assertEqual(re.search(r"\B(b.)\B",
376 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
377 self.assertEqual(re.search(r"\b(b.)\b",
378 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
379 self.assertEqual(re.search(r"\B(b.)\B",
380 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
381 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
382 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
384 self.assertEqual(re.search(r"\b(b.)\b",
385 u"abcd abc bcd bx").group(1), "bx")
386 self.assertEqual(re.search(r"\B(b.)\B",
387 u"abc bcd bc abxd").group(1), "bx")
388 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
389 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
390 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
391 self.assertEqual(re.search(r"\d\D\w\W\s\S",
392 "1aa! a").group(0), "1aa! a")
393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a", re.LOCALE).group(0), "1aa! a")
395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a", re.UNICODE).group(0), "1aa! a")
397
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200398 def test_string_boundaries(self):
399 # See http://bugs.python.org/issue10713
400 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
401 "abc")
402 # There's a word boundary at the start of a string.
403 self.assertTrue(re.match(r"\b", "abc"))
404 # A non-empty string includes a non-boundary zero-length match.
405 self.assertTrue(re.search(r"\B", "abc"))
406 # There is no non-boundary match at the start of a string.
407 self.assertFalse(re.match(r"\B", "abc"))
408 # However, an empty string contains no word boundaries, and also no
409 # non-boundaries.
410 self.assertEqual(re.search(r"\B", ""), None)
411 # This one is questionable and different from the perlre behaviour,
412 # but describes current behavior.
413 self.assertEqual(re.search(r"\b", ""), None)
414 # A single word-character string has two boundaries, but no
415 # non-boundary gaps.
416 self.assertEqual(len(re.findall(r"\b", "a")), 2)
417 self.assertEqual(len(re.findall(r"\B", "a")), 0)
418 # If there are no words, there are no boundaries
419 self.assertEqual(len(re.findall(r"\b", " ")), 0)
420 self.assertEqual(len(re.findall(r"\b", " ")), 0)
421 # Can match around the whitespace.
422 self.assertEqual(len(re.findall(r"\B", " ")), 2)
423
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000424 def test_bigcharset(self):
425 self.assertEqual(re.match(u"([\u2222\u2223])",
426 u"\u2222").group(1), u"\u2222")
427 self.assertEqual(re.match(u"([\u2222\u2223])",
428 u"\u2222", re.UNICODE).group(1), u"\u2222")
429
Antoine Pitroub83ea142012-11-20 22:30:42 +0100430 def test_big_codesize(self):
431 # Issue #1160
432 r = re.compile('|'.join(('%d'%x for x in range(10000))))
433 self.assertIsNotNone(r.match('1000'))
434 self.assertIsNotNone(r.match('9999'))
435
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436 def test_anyall(self):
437 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
438 "a\nb")
439 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
440 "a\n\nb")
441
442 def test_non_consuming(self):
443 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
444 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
445 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
446 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
447 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
448 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
450
451 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
452 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
453 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
454 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
455
456 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000457 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
458 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000459 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
460 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
461 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
462 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
463 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
464 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
465 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
466 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
467
468 def test_category(self):
469 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
470
471 def test_getlower(self):
472 import _sre
473 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
474 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
475 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
476
477 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
478 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
479
480 def test_not_literal(self):
481 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
482 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
483
484 def test_search_coverage(self):
485 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
486 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
487
Ezio Melotti46645632011-03-25 14:50:52 +0200488 def assertMatch(self, pattern, text, match=None, span=None,
489 matcher=re.match):
490 if match is None and span is None:
491 # the pattern matches the whole text
492 match = text
493 span = (0, len(text))
494 elif match is None or span is None:
495 raise ValueError('If match is not None, span should be specified '
496 '(and vice versa).')
497 m = matcher(pattern, text)
498 self.assertTrue(m)
499 self.assertEqual(m.group(), match)
500 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000501
Ezio Melotti46645632011-03-25 14:50:52 +0200502 def test_re_escape(self):
503 alnum_chars = string.ascii_letters + string.digits
504 p = u''.join(unichr(i) for i in range(256))
505 for c in p:
506 if c in alnum_chars:
507 self.assertEqual(re.escape(c), c)
508 elif c == u'\x00':
509 self.assertEqual(re.escape(c), u'\\000')
510 else:
511 self.assertEqual(re.escape(c), u'\\' + c)
512 self.assertMatch(re.escape(c), c)
513 self.assertMatch(re.escape(p), p)
514
515 def test_re_escape_byte(self):
516 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
517 p = ''.join(chr(i) for i in range(256))
518 for b in p:
519 if b in alnum_chars:
520 self.assertEqual(re.escape(b), b)
521 elif b == b'\x00':
522 self.assertEqual(re.escape(b), b'\\000')
523 else:
524 self.assertEqual(re.escape(b), b'\\' + b)
525 self.assertMatch(re.escape(b), b)
526 self.assertMatch(re.escape(p), p)
527
528 def test_re_escape_non_ascii(self):
529 s = u'xxx\u2620\u2620\u2620xxx'
530 s_escaped = re.escape(s)
531 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
532 self.assertMatch(s_escaped, s)
533 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
534 u'x\u2620\u2620\u2620x', (2, 7), re.search)
535
536 def test_re_escape_non_ascii_bytes(self):
537 b = u'y\u2620y\u2620y'.encode('utf-8')
538 b_escaped = re.escape(b)
539 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
540 self.assertMatch(b_escaped, b)
541 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
542 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000543
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000544 def test_pickling(self):
545 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000546 self.pickle_test(pickle)
547 import cPickle
548 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000549 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000550 import_module("sre", deprecated=True)
551 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000552
553 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000554 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
555 s = pickle.dumps(oldpat)
556 newpat = pickle.loads(s)
557 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000558
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000559 def test_constants(self):
560 self.assertEqual(re.I, re.IGNORECASE)
561 self.assertEqual(re.L, re.LOCALE)
562 self.assertEqual(re.M, re.MULTILINE)
563 self.assertEqual(re.S, re.DOTALL)
564 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000565
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000566 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000567 for flag in [re.I, re.M, re.X, re.S, re.L]:
568 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000569
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000570 def test_sre_character_literals(self):
571 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
572 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
573 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
574 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
575 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
576 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
577 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
578 self.assertRaises(re.error, re.match, "\911", "")
579
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000580 def test_sre_character_class_literals(self):
581 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
582 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
583 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
584 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
585 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
588 self.assertRaises(re.error, re.match, "[\911]", "")
589
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000590 def test_bug_113254(self):
591 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
592 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
593 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
594
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000595 def test_bug_527371(self):
596 # bug described in patches 527371/672491
597 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
598 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
599 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
600 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
601 self.assertEqual(re.match("((a))", "a").lastindex, 1)
602
603 def test_bug_545855(self):
604 # bug 545855 -- This pattern failed to cause a compile error as it
605 # should, instead provoking a TypeError.
606 self.assertRaises(re.error, re.compile, 'foo[a-')
607
608 def test_bug_418626(self):
609 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
610 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
611 # pattern '*?' on a long string.
612 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
613 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
614 20003)
615 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000616 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000617 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000618 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000619
620 def test_bug_612074(self):
621 pat=u"["+re.escape(u"\u2039")+u"]"
622 self.assertEqual(re.compile(pat) and 1, 1)
623
Skip Montanaro1e703c62003-04-25 15:40:28 +0000624 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000625 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000626 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000627 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
628 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
629 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000630
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200631 def test_unlimited_zero_width_repeat(self):
632 # Issue #9669
633 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
634 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
635 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
636 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
637 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
638 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
639
Skip Montanaro1e703c62003-04-25 15:40:28 +0000640 def test_scanner(self):
641 def s_ident(scanner, token): return token
642 def s_operator(scanner, token): return "op%s" % token
643 def s_float(scanner, token): return float(token)
644 def s_int(scanner, token): return int(token)
645
646 scanner = Scanner([
647 (r"[a-zA-Z_]\w*", s_ident),
648 (r"\d+\.\d*", s_float),
649 (r"\d+", s_int),
650 (r"=|\+|-|\*|/", s_operator),
651 (r"\s+", None),
652 ])
653
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000654 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
655
Skip Montanaro1e703c62003-04-25 15:40:28 +0000656 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
657 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
658 'op+', 'bar'], ''))
659
Skip Montanaro5ba00542003-04-25 16:00:14 +0000660 def test_bug_448951(self):
661 # bug 448951 (similar to 429357, but with single char match)
662 # (Also test greedy matches.)
663 for op in '','?','*':
664 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
665 (None, None))
666 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
667 ('a:', 'a'))
668
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000669 def test_bug_725106(self):
670 # capturing groups in alternatives in repeats
671 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
672 ('b', 'a'))
673 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
674 ('c', 'b'))
675 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
676 ('b', None))
677 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
678 ('b', None))
679 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
680 ('b', 'a'))
681 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
682 ('c', 'b'))
683 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
684 ('b', None))
685 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
686 ('b', None))
687
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000688 def test_bug_725149(self):
689 # mark_stack_base restoring before restoring marks
690 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
691 ('a', None))
692 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
693 ('a', None, None))
694
Just van Rossum12723ba2003-07-02 20:03:04 +0000695 def test_bug_764548(self):
696 # bug 764548, re.compile() barfs on str/unicode subclasses
697 try:
698 unicode
699 except NameError:
700 return # no problem if we have no unicode
701 class my_unicode(unicode): pass
702 pat = re.compile(my_unicode("abc"))
703 self.assertEqual(pat.match("xyz"), None)
704
Skip Montanaro5ba00542003-04-25 16:00:14 +0000705 def test_finditer(self):
706 iter = re.finditer(r":+", "a:b::c:::d")
707 self.assertEqual([item.group(0) for item in iter],
708 [":", "::", ":::"])
709
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000710 def test_bug_926075(self):
711 try:
712 unicode
713 except NameError:
714 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000715 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000716 re.compile(eval("u'bug_926075'")))
717
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000718 def test_bug_931848(self):
719 try:
720 unicode
721 except NameError:
722 pass
723 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
724 self.assertEqual(re.compile(pattern).split("a.b.c"),
725 ['a','b','c'])
726
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000727 def test_bug_581080(self):
728 iter = re.finditer(r"\s", "a b")
729 self.assertEqual(iter.next().span(), (1,2))
730 self.assertRaises(StopIteration, iter.next)
731
732 scanner = re.compile(r"\s").scanner("a b")
733 self.assertEqual(scanner.search().span(), (1, 2))
734 self.assertEqual(scanner.search(), None)
735
736 def test_bug_817234(self):
737 iter = re.finditer(r".*", "asdf")
738 self.assertEqual(iter.next().span(), (0, 4))
739 self.assertEqual(iter.next().span(), (4, 4))
740 self.assertRaises(StopIteration, iter.next)
741
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000742 def test_bug_6561(self):
743 # '\d' should match characters in Unicode category 'Nd'
744 # (Number, Decimal Digit), but not those in 'Nl' (Number,
745 # Letter) or 'No' (Number, Other).
746 decimal_digits = [
747 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
748 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
749 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
750 ]
751 for x in decimal_digits:
752 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
753
754 not_decimal_digits = [
755 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
756 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
757 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
758 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
759 ]
760 for x in not_decimal_digits:
761 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
762
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000763 def test_empty_array(self):
764 # SF buf 1647541
765 import array
766 for typecode in 'cbBuhHiIlLfd':
767 a = array.array(typecode)
768 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000769 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000770
Guido van Rossumae04c332008-01-03 19:12:44 +0000771 def test_inline_flags(self):
772 # Bug #1700
773 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
774 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
775
776 p = re.compile(upper_char, re.I | re.U)
777 q = p.match(lower_char)
778 self.assertNotEqual(q, None)
779
780 p = re.compile(lower_char, re.I | re.U)
781 q = p.match(upper_char)
782 self.assertNotEqual(q, None)
783
784 p = re.compile('(?i)' + upper_char, re.U)
785 q = p.match(lower_char)
786 self.assertNotEqual(q, None)
787
788 p = re.compile('(?i)' + lower_char, re.U)
789 q = p.match(upper_char)
790 self.assertNotEqual(q, None)
791
792 p = re.compile('(?iu)' + upper_char)
793 q = p.match(lower_char)
794 self.assertNotEqual(q, None)
795
796 p = re.compile('(?iu)' + lower_char)
797 q = p.match(upper_char)
798 self.assertNotEqual(q, None)
799
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000800 def test_dollar_matches_twice(self):
801 "$ matches the end of string, and just before the terminating \n"
802 pattern = re.compile('$')
803 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
804 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
805 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
806
807 pattern = re.compile('$', re.MULTILINE)
808 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
809 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
810 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
811
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000812 def test_dealloc(self):
813 # issue 3299: check for segfault in debug build
814 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000815 # the overflow limit is different on wide and narrow builds and it
816 # depends on the definition of SRE_CODE (see sre.h).
817 # 2**128 should be big enough to overflow on both. For smaller values
818 # a RuntimeError is raised instead of OverflowError.
819 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000820 self.assertRaises(TypeError, re.finditer, "a", {})
821 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000822
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200823 def test_compile(self):
824 # Test return value when given string and pattern as parameter
825 pattern = re.compile('random pattern')
826 self.assertIsInstance(pattern, re._pattern_type)
827 same_pattern = re.compile(pattern)
828 self.assertIsInstance(same_pattern, re._pattern_type)
829 self.assertIs(same_pattern, pattern)
830 # Test behaviour when not given a string or pattern as parameter
831 self.assertRaises(TypeError, re.compile, 0)
832
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200833 def test_bug_13899(self):
834 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
835 # nothing. Ditto B and Z.
836 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
837 ['A', 'B', '\b', 'C', 'Z'])
838
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100839 @precisionbigmemtest(size=_2G, memuse=1)
840 def test_large_search(self, size):
841 # Issue #10182: indices were 32-bit-truncated.
842 s = 'a' * size
843 m = re.search('$', s)
844 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100845 self.assertEqual(m.start(), size)
846 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100847
Antoine Pitroub83575b2012-12-02 12:52:36 +0100848 # The huge memuse is because of re.sub() using a list and a join()
849 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100850 @precisionbigmemtest(size=_2G, memuse=16 + 2)
851 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100852 # Issue #10182: indices were 32-bit-truncated.
853 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100854 r, n = re.subn('', '', s)
855 self.assertEqual(r, s)
856 self.assertEqual(n, size + 1)
857
858
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200859 def test_repeat_minmax_overflow(self):
860 # Issue #13169
861 string = "x" * 100000
862 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
863 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
864 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
865 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
866 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
867 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
868 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
869 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
870 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
871 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
872 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
873
874 @cpython_only
875 def test_repeat_minmax_overflow_maxrepeat(self):
876 try:
877 from _sre import MAXREPEAT
878 except ImportError:
879 self.skipTest('requires _sre.MAXREPEAT constant')
880 string = "x" * 100000
881 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
882 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
883 (0, 100000))
884 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
885 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
886 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
887 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
888
889
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000890def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000891 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000892 if verbose:
893 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000894 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000895 # To save time, only run the first and last 10 tests
896 #tests = tests[:10] + tests[-10:]
897 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000898
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000899 for t in tests:
900 sys.stdout.flush()
901 pattern = s = outcome = repl = expected = None
902 if len(t) == 5:
903 pattern, s, outcome, repl, expected = t
904 elif len(t) == 3:
905 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000906 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000907 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
908
Guido van Rossum41360a41998-03-26 19:42:58 +0000909 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000910 obj = re.compile(pattern)
911 except re.error:
912 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000913 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000914 print '=== Syntax error:', t
915 except KeyboardInterrupt: raise KeyboardInterrupt
916 except:
917 print '*** Unexpected error ***', t
918 if verbose:
919 traceback.print_exc(file=sys.stdout)
920 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000921 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000922 result = obj.search(s)
923 except re.error, msg:
924 print '=== Unexpected exception', t, repr(msg)
925 if outcome == SYNTAX_ERROR:
926 # This should have been a syntax error; forget it.
927 pass
928 elif outcome == FAIL:
929 if result is None: pass # No match, as expected
930 else: print '=== Succeeded incorrectly', t
931 elif outcome == SUCCEED:
932 if result is not None:
933 # Matched, as expected, so now we compute the
934 # result string and compare it to our expected result.
935 start, end = result.span(0)
936 vardict={'found': result.group(0),
937 'groups': result.group(),
938 'flags': result.re.flags}
939 for i in range(1, 100):
940 try:
941 gi = result.group(i)
942 # Special hack because else the string concat fails:
943 if gi is None:
944 gi = "None"
945 except IndexError:
946 gi = "Error"
947 vardict['g%d' % i] = gi
948 for i in result.re.groupindex.keys():
949 try:
950 gi = result.group(i)
951 if gi is None:
952 gi = "None"
953 except IndexError:
954 gi = "Error"
955 vardict[i] = gi
956 repl = eval(repl, vardict)
957 if repl != expected:
958 print '=== grouping error', t,
959 print repr(repl) + ' should be ' + repr(expected)
960 else:
961 print '=== Failed incorrectly', t
962
963 # Try the match on a unicode string, and check that it
964 # still succeeds.
965 try:
966 result = obj.search(unicode(s, "latin-1"))
967 if result is None:
968 print '=== Fails on unicode match', t
969 except NameError:
970 continue # 1.5.2
971 except TypeError:
972 continue # unicode test case
973
974 # Try the match on a unicode pattern, and check that it
975 # still succeeds.
976 obj=re.compile(unicode(pattern, "latin-1"))
977 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000978 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000979 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000980
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000981 # Try the match with the search area limited to the extent
982 # of the match and see if it still succeeds. \B will
983 # break (because it won't match at the end or start of a
984 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000985
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000986 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
987 and result is not None:
988 obj = re.compile(pattern)
989 result = obj.search(s, result.start(0), result.end(0) + 1)
990 if result is None:
991 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000992
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000993 # Try the match with IGNORECASE enabled, and check that it
994 # still succeeds.
995 obj = re.compile(pattern, re.IGNORECASE)
996 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000997 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000998 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000999
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001000 # Try the match with LOCALE enabled, and check that it
1001 # still succeeds.
1002 obj = re.compile(pattern, re.LOCALE)
1003 result = obj.search(s)
1004 if result is None:
1005 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001006
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001007 # Try the match with UNICODE locale enabled, and check
1008 # that it still succeeds.
1009 obj = re.compile(pattern, re.UNICODE)
1010 result = obj.search(s)
1011 if result is None:
1012 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001013
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001014def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001015 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001016 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001017
1018if __name__ == "__main__":
1019 test_main()