blob: 7ebbf05652fa117a1bb24988de18bb546a57f059 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00004from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04005import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02006import sys
7import string
8import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00009from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000010
Antoine Pitrou735f36e2012-12-03 20:53:12 +010011
Guido van Rossum23b22571997-07-17 22:36:14 +000012# Misc tests from Tim Peters' re.doc
13
Just van Rossum6802c6e2003-07-02 14:36:59 +000014# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020015# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000016# cover most of the code.
17
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000019
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000021
22 def test_weakref(self):
23 s = 'QabbbcR'
24 x = re.compile('ab+c')
25 y = proxy(x)
26 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27
Skip Montanaro8ed06da2003-04-24 19:43:18 +000028 def test_search_star_plus(self):
29 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000033 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000034 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000038 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000039
Skip Montanaro8ed06da2003-04-24 19:43:18 +000040 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000041 int_value = int(matchobj.group(0))
42 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000043
Skip Montanaro8ed06da2003-04-24 19:43:18 +000044 def test_basic_re_sub(self):
45 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47 '9.3 -3 24x100y')
48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 s = r"\1\1"
55 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000058
Skip Montanaro8ed06da2003-04-24 19:43:18 +000059 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000063
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000071
Skip Montanaro2726fcd2003-04-25 14:31:54 +000072 def test_bug_449964(self):
73 # fails for group followed by other escape
74 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75 'xx\bxx\b')
76
77 def test_bug_449000(self):
78 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000079 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000087
Guido van Rossum1ff91d92007-09-10 22:02:25 +000088 def test_bug_1140(self):
89 # re.sub(x, y, u'') should return u'', not '', and
90 # re.sub(x, y, '') should return '', not u''.
91 # Also:
92 # re.sub(x, y, unicode(x)) should return unicode(y), and
93 # re.sub(x, y, str(x)) should return
94 # str(y) if isinstance(y, str) else unicode(y).
95 for x in 'x', u'x':
96 for y in 'y', u'y':
97 z = re.sub(x, y, u'')
98 self.assertEqual(z, u'')
99 self.assertEqual(type(z), unicode)
100 #
101 z = re.sub(x, y, '')
102 self.assertEqual(z, '')
103 self.assertEqual(type(z), str)
104 #
105 z = re.sub(x, y, unicode(x))
106 self.assertEqual(z, y)
107 self.assertEqual(type(z), unicode)
108 #
109 z = re.sub(x, y, str(x))
110 self.assertEqual(z, y)
111 self.assertEqual(type(z), type(y))
112
Raymond Hettinger80016c92007-12-19 18:13:31 +0000113 def test_bug_1661(self):
114 # Verify that flags do not get silently ignored with compiled patterns
115 pattern = re.compile('.')
116 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.compile, pattern, re.I)
120
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000121 def test_bug_3629(self):
122 # A regex that triggered a bug in the sre-code validator
123 re.compile("(?P<quote>)(?(quote))")
124
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000125 def test_sub_template_numeric_escape(self):
126 # bug 776311 and friends
127 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134
135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137
138 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143
144 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000146
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000147 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159
160 # in python2.3 (etc), these loop endlessly in sre_parser.py
161 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163 'xz8')
164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165 'xza')
166
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000167 def test_qualified_re_sub(self):
168 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000170
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000171 def test_bug_114660(self):
172 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
173 'hello there')
174
175 def test_bug_462270(self):
176 # Test for empty sub() behaviour, see SF bug #462270
177 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179
Ezio Melottief317382012-11-03 20:31:12 +0200180 def test_symbolic_groups(self):
181 re.compile('(?P<a>x)(?P=a)(?(a)y)')
182 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184 self.assertRaises(re.error, re.compile, '(?Px)')
185 self.assertRaises(re.error, re.compile, '(?P=)')
186 self.assertRaises(re.error, re.compile, '(?P=1)')
187 self.assertRaises(re.error, re.compile, '(?P=a)')
188 self.assertRaises(re.error, re.compile, '(?P=a1)')
189 self.assertRaises(re.error, re.compile, '(?P=a.)')
190 self.assertRaises(re.error, re.compile, '(?P<)')
191 self.assertRaises(re.error, re.compile, '(?P<>)')
192 self.assertRaises(re.error, re.compile, '(?P<1>)')
193 self.assertRaises(re.error, re.compile, '(?P<a.>)')
194 self.assertRaises(re.error, re.compile, '(?())')
195 self.assertRaises(re.error, re.compile, '(?(a))')
196 self.assertRaises(re.error, re.compile, '(?(1a))')
197 self.assertRaises(re.error, re.compile, '(?(a.))')
198
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000199 def test_symbolic_refs(self):
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
208 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000209 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 def test_re_subn(self):
212 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000217
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000218 def test_re_split(self):
219 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221 self.assertEqual(re.split("(:*)", ":a:b::c"),
222 ['', ':', 'a', ':', 'b', '::', 'c'])
223 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224 self.assertEqual(re.split("(:)*", ":a:b::c"),
225 ['', ':', 'a', ':', 'b', ':', 'c'])
226 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227 ['', ':', 'a', ':b::', 'c'])
228 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
229 ['', None, ':', 'a', None, ':', '', 'b', None, '',
230 None, '::', 'c'])
231 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
232 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000233
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000234 def test_qualified_re_split(self):
235 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237 self.assertEqual(re.split("(:)", ":a:b::c", 2),
238 ['', ':', 'a', ':', 'b::c'])
239 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000241
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000242 def test_re_findall(self):
243 self.assertEqual(re.findall(":+", "abc"), [])
244 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247 (":", ":"),
248 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000249
Skip Montanaro5ba00542003-04-25 16:00:14 +0000250 def test_bug_117612(self):
251 self.assertEqual(re.findall(r"(a|(b))", "aba"),
252 [("a", ""),("b", "b"),("a", "")])
253
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000254 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000255 self.assertEqual(re.match('a', 'a').groups(), ())
256 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000260
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000261 pat = re.compile('((a)|(b))(c)?')
262 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000267
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000268 # A single group
269 m = re.match('(a)', 'a')
270 self.assertEqual(m.group(0), 'a')
271 self.assertEqual(m.group(0), 'a')
272 self.assertEqual(m.group(1), 'a')
273 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000274
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000275 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
276 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278 (None, 'b', None))
279 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000280
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000281 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000282 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
283 ('(', 'a'))
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
285 (None, 'a'))
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
289 ('a', 'b'))
290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
291 (None, 'd'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
293 (None, 'd'))
294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
295 ('a', ''))
296
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000297 # Tests for bug #1177831: exercise groups other than the first group
298 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
299 self.assertEqual(p.match('abc').groups(),
300 ('a', 'b', 'c'))
301 self.assertEqual(p.match('ad').groups(),
302 ('a', None, 'd'))
303 self.assertEqual(p.match('abd'), None)
304 self.assertEqual(p.match('ac'), None)
305
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000306
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000307 def test_re_groupref(self):
308 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
309 ('|', 'a'))
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
311 (None, 'a'))
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
314 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
315 ('a', 'a'))
316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
317 (None, None))
318
319 def test_groupdict(self):
320 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321 'first second').groupdict(),
322 {'first':'first', 'second':'second'})
323
324 def test_expand(self):
325 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326 "first second")
327 .expand(r"\2 \1 \g<second> \g<first>"),
328 "second first second first")
329
330 def test_repeat_minmax(self):
331 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335
336 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344
345 self.assertEqual(re.match("^x{1}$", "xxx"), None)
346 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349
350 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000359 self.assertEqual(re.match("^x{}$", "xxx"), None)
360 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000362 def test_getattr(self):
363 self.assertEqual(re.match("(a)", "a").pos, 0)
364 self.assertEqual(re.match("(a)", "a").endpos, 1)
365 self.assertEqual(re.match("(a)", "a").string, "a")
366 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367 self.assertNotEqual(re.match("(a)", "a").re, None)
368
369 def test_special_escapes(self):
370 self.assertEqual(re.search(r"\b(b.)\b",
371 "abcd abc bcd bx").group(1), "bx")
372 self.assertEqual(re.search(r"\B(b.)\B",
373 "abc bcd bc abxd").group(1), "bx")
374 self.assertEqual(re.search(r"\b(b.)\b",
375 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
376 self.assertEqual(re.search(r"\B(b.)\B",
377 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
378 self.assertEqual(re.search(r"\b(b.)\b",
379 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
380 self.assertEqual(re.search(r"\B(b.)\B",
381 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
382 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385 self.assertEqual(re.search(r"\b(b.)\b",
386 u"abcd abc bcd bx").group(1), "bx")
387 self.assertEqual(re.search(r"\B(b.)\B",
388 u"abc bcd bc abxd").group(1), "bx")
389 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392 self.assertEqual(re.search(r"\d\D\w\W\s\S",
393 "1aa! a").group(0), "1aa! a")
394 self.assertEqual(re.search(r"\d\D\w\W\s\S",
395 "1aa! a", re.LOCALE).group(0), "1aa! a")
396 self.assertEqual(re.search(r"\d\D\w\W\s\S",
397 "1aa! a", re.UNICODE).group(0), "1aa! a")
398
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200399 def test_string_boundaries(self):
400 # See http://bugs.python.org/issue10713
401 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402 "abc")
403 # There's a word boundary at the start of a string.
404 self.assertTrue(re.match(r"\b", "abc"))
405 # A non-empty string includes a non-boundary zero-length match.
406 self.assertTrue(re.search(r"\B", "abc"))
407 # There is no non-boundary match at the start of a string.
408 self.assertFalse(re.match(r"\B", "abc"))
409 # However, an empty string contains no word boundaries, and also no
410 # non-boundaries.
411 self.assertEqual(re.search(r"\B", ""), None)
412 # This one is questionable and different from the perlre behaviour,
413 # but describes current behavior.
414 self.assertEqual(re.search(r"\b", ""), None)
415 # A single word-character string has two boundaries, but no
416 # non-boundary gaps.
417 self.assertEqual(len(re.findall(r"\b", "a")), 2)
418 self.assertEqual(len(re.findall(r"\B", "a")), 0)
419 # If there are no words, there are no boundaries
420 self.assertEqual(len(re.findall(r"\b", " ")), 0)
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 # Can match around the whitespace.
423 self.assertEqual(len(re.findall(r"\B", " ")), 2)
424
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000425 def test_bigcharset(self):
426 self.assertEqual(re.match(u"([\u2222\u2223])",
427 u"\u2222").group(1), u"\u2222")
428 self.assertEqual(re.match(u"([\u2222\u2223])",
429 u"\u2222", re.UNICODE).group(1), u"\u2222")
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300430 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
431 self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000432
Antoine Pitroub83ea142012-11-20 22:30:42 +0100433 def test_big_codesize(self):
434 # Issue #1160
435 r = re.compile('|'.join(('%d'%x for x in range(10000))))
436 self.assertIsNotNone(r.match('1000'))
437 self.assertIsNotNone(r.match('9999'))
438
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000439 def test_anyall(self):
440 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
441 "a\nb")
442 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
443 "a\n\nb")
444
445 def test_non_consuming(self):
446 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
447 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
448 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
449 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
450 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
451 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
453
454 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
455 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
458
459 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000460 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
461 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000462 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
463 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
464 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
465 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
466 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
467 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
468 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
469 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
470
471 def test_category(self):
472 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
473
474 def test_getlower(self):
475 import _sre
476 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
477 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
478 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
479
480 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
481 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
482
483 def test_not_literal(self):
484 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
485 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
486
487 def test_search_coverage(self):
488 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
489 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
490
Ezio Melotti46645632011-03-25 14:50:52 +0200491 def assertMatch(self, pattern, text, match=None, span=None,
492 matcher=re.match):
493 if match is None and span is None:
494 # the pattern matches the whole text
495 match = text
496 span = (0, len(text))
497 elif match is None or span is None:
498 raise ValueError('If match is not None, span should be specified '
499 '(and vice versa).')
500 m = matcher(pattern, text)
501 self.assertTrue(m)
502 self.assertEqual(m.group(), match)
503 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000504
Ezio Melotti46645632011-03-25 14:50:52 +0200505 def test_re_escape(self):
506 alnum_chars = string.ascii_letters + string.digits
507 p = u''.join(unichr(i) for i in range(256))
508 for c in p:
509 if c in alnum_chars:
510 self.assertEqual(re.escape(c), c)
511 elif c == u'\x00':
512 self.assertEqual(re.escape(c), u'\\000')
513 else:
514 self.assertEqual(re.escape(c), u'\\' + c)
515 self.assertMatch(re.escape(c), c)
516 self.assertMatch(re.escape(p), p)
517
518 def test_re_escape_byte(self):
519 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
520 p = ''.join(chr(i) for i in range(256))
521 for b in p:
522 if b in alnum_chars:
523 self.assertEqual(re.escape(b), b)
524 elif b == b'\x00':
525 self.assertEqual(re.escape(b), b'\\000')
526 else:
527 self.assertEqual(re.escape(b), b'\\' + b)
528 self.assertMatch(re.escape(b), b)
529 self.assertMatch(re.escape(p), p)
530
531 def test_re_escape_non_ascii(self):
532 s = u'xxx\u2620\u2620\u2620xxx'
533 s_escaped = re.escape(s)
534 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
535 self.assertMatch(s_escaped, s)
536 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
537 u'x\u2620\u2620\u2620x', (2, 7), re.search)
538
539 def test_re_escape_non_ascii_bytes(self):
540 b = u'y\u2620y\u2620y'.encode('utf-8')
541 b_escaped = re.escape(b)
542 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
543 self.assertMatch(b_escaped, b)
544 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
545 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000546
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000547 def test_pickling(self):
548 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000549 self.pickle_test(pickle)
550 import cPickle
551 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000552 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000553 import_module("sre", deprecated=True)
554 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000555
556 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000557 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
558 s = pickle.dumps(oldpat)
559 newpat = pickle.loads(s)
560 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000561
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000562 def test_constants(self):
563 self.assertEqual(re.I, re.IGNORECASE)
564 self.assertEqual(re.L, re.LOCALE)
565 self.assertEqual(re.M, re.MULTILINE)
566 self.assertEqual(re.S, re.DOTALL)
567 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000568
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000569 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000570 for flag in [re.I, re.M, re.X, re.S, re.L]:
571 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000572
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000573 def test_sre_character_literals(self):
574 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
575 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
576 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
577 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
578 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
579 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
580 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
581 self.assertRaises(re.error, re.match, "\911", "")
582
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000583 def test_sre_character_class_literals(self):
584 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
585 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
588 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
589 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
590 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
591 self.assertRaises(re.error, re.match, "[\911]", "")
592
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000593 def test_bug_113254(self):
594 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
595 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
596 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
597
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000598 def test_bug_527371(self):
599 # bug described in patches 527371/672491
600 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
601 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
602 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
603 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
604 self.assertEqual(re.match("((a))", "a").lastindex, 1)
605
606 def test_bug_545855(self):
607 # bug 545855 -- This pattern failed to cause a compile error as it
608 # should, instead provoking a TypeError.
609 self.assertRaises(re.error, re.compile, 'foo[a-')
610
611 def test_bug_418626(self):
612 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
613 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
614 # pattern '*?' on a long string.
615 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
616 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
617 20003)
618 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000619 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000620 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000621 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000622
623 def test_bug_612074(self):
624 pat=u"["+re.escape(u"\u2039")+u"]"
625 self.assertEqual(re.compile(pat) and 1, 1)
626
Skip Montanaro1e703c62003-04-25 15:40:28 +0000627 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000628 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000629 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000630 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
631 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
632 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000633
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200634 def test_unlimited_zero_width_repeat(self):
635 # Issue #9669
636 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
637 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
638 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
639 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
640 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
641 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
642
Skip Montanaro1e703c62003-04-25 15:40:28 +0000643 def test_scanner(self):
644 def s_ident(scanner, token): return token
645 def s_operator(scanner, token): return "op%s" % token
646 def s_float(scanner, token): return float(token)
647 def s_int(scanner, token): return int(token)
648
649 scanner = Scanner([
650 (r"[a-zA-Z_]\w*", s_ident),
651 (r"\d+\.\d*", s_float),
652 (r"\d+", s_int),
653 (r"=|\+|-|\*|/", s_operator),
654 (r"\s+", None),
655 ])
656
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000657 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
658
Skip Montanaro1e703c62003-04-25 15:40:28 +0000659 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
660 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
661 'op+', 'bar'], ''))
662
Skip Montanaro5ba00542003-04-25 16:00:14 +0000663 def test_bug_448951(self):
664 # bug 448951 (similar to 429357, but with single char match)
665 # (Also test greedy matches.)
666 for op in '','?','*':
667 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
668 (None, None))
669 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
670 ('a:', 'a'))
671
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000672 def test_bug_725106(self):
673 # capturing groups in alternatives in repeats
674 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
675 ('b', 'a'))
676 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
677 ('c', 'b'))
678 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
679 ('b', None))
680 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
681 ('b', None))
682 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
683 ('b', 'a'))
684 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
685 ('c', 'b'))
686 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
687 ('b', None))
688 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
689 ('b', None))
690
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000691 def test_bug_725149(self):
692 # mark_stack_base restoring before restoring marks
693 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
694 ('a', None))
695 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
696 ('a', None, None))
697
Just van Rossum12723ba2003-07-02 20:03:04 +0000698 def test_bug_764548(self):
699 # bug 764548, re.compile() barfs on str/unicode subclasses
700 try:
701 unicode
702 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600703 self.skipTest('no problem if we have no unicode')
Just van Rossum12723ba2003-07-02 20:03:04 +0000704 class my_unicode(unicode): pass
705 pat = re.compile(my_unicode("abc"))
706 self.assertEqual(pat.match("xyz"), None)
707
Skip Montanaro5ba00542003-04-25 16:00:14 +0000708 def test_finditer(self):
709 iter = re.finditer(r":+", "a:b::c:::d")
710 self.assertEqual([item.group(0) for item in iter],
711 [":", "::", ":::"])
712
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000713 def test_bug_926075(self):
714 try:
715 unicode
716 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600717 self.skipTest('no problem if we have no unicode')
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000718 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000719 re.compile(eval("u'bug_926075'")))
720
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000721 def test_bug_931848(self):
722 try:
723 unicode
724 except NameError:
Zachary Ware1f702212013-12-10 14:09:20 -0600725 self.skipTest('no problem if we have no unicode')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000726 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
727 self.assertEqual(re.compile(pattern).split("a.b.c"),
728 ['a','b','c'])
729
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000730 def test_bug_581080(self):
731 iter = re.finditer(r"\s", "a b")
732 self.assertEqual(iter.next().span(), (1,2))
733 self.assertRaises(StopIteration, iter.next)
734
735 scanner = re.compile(r"\s").scanner("a b")
736 self.assertEqual(scanner.search().span(), (1, 2))
737 self.assertEqual(scanner.search(), None)
738
739 def test_bug_817234(self):
740 iter = re.finditer(r".*", "asdf")
741 self.assertEqual(iter.next().span(), (0, 4))
742 self.assertEqual(iter.next().span(), (4, 4))
743 self.assertRaises(StopIteration, iter.next)
744
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000745 def test_bug_6561(self):
746 # '\d' should match characters in Unicode category 'Nd'
747 # (Number, Decimal Digit), but not those in 'Nl' (Number,
748 # Letter) or 'No' (Number, Other).
749 decimal_digits = [
750 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
751 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
752 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
753 ]
754 for x in decimal_digits:
755 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
756
757 not_decimal_digits = [
758 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
759 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
760 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
761 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
762 ]
763 for x in not_decimal_digits:
764 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
765
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000766 def test_empty_array(self):
767 # SF buf 1647541
768 import array
769 for typecode in 'cbBuhHiIlLfd':
770 a = array.array(typecode)
771 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000772 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000773
Guido van Rossumae04c332008-01-03 19:12:44 +0000774 def test_inline_flags(self):
775 # Bug #1700
776 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
777 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
778
779 p = re.compile(upper_char, re.I | re.U)
780 q = p.match(lower_char)
781 self.assertNotEqual(q, None)
782
783 p = re.compile(lower_char, re.I | re.U)
784 q = p.match(upper_char)
785 self.assertNotEqual(q, None)
786
787 p = re.compile('(?i)' + upper_char, re.U)
788 q = p.match(lower_char)
789 self.assertNotEqual(q, None)
790
791 p = re.compile('(?i)' + lower_char, re.U)
792 q = p.match(upper_char)
793 self.assertNotEqual(q, None)
794
795 p = re.compile('(?iu)' + upper_char)
796 q = p.match(lower_char)
797 self.assertNotEqual(q, None)
798
799 p = re.compile('(?iu)' + lower_char)
800 q = p.match(upper_char)
801 self.assertNotEqual(q, None)
802
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000803 def test_dollar_matches_twice(self):
804 "$ matches the end of string, and just before the terminating \n"
805 pattern = re.compile('$')
806 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
807 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
808 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
809
810 pattern = re.compile('$', re.MULTILINE)
811 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
812 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
813 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
814
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000815 def test_dealloc(self):
816 # issue 3299: check for segfault in debug build
817 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000818 # the overflow limit is different on wide and narrow builds and it
819 # depends on the definition of SRE_CODE (see sre.h).
820 # 2**128 should be big enough to overflow on both. For smaller values
821 # a RuntimeError is raised instead of OverflowError.
822 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000823 self.assertRaises(TypeError, re.finditer, "a", {})
824 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000825
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200826 def test_compile(self):
827 # Test return value when given string and pattern as parameter
828 pattern = re.compile('random pattern')
829 self.assertIsInstance(pattern, re._pattern_type)
830 same_pattern = re.compile(pattern)
831 self.assertIsInstance(same_pattern, re._pattern_type)
832 self.assertIs(same_pattern, pattern)
833 # Test behaviour when not given a string or pattern as parameter
834 self.assertRaises(TypeError, re.compile, 0)
835
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200836 def test_bug_13899(self):
837 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
838 # nothing. Ditto B and Z.
839 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
840 ['A', 'B', '\b', 'C', 'Z'])
841
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100842 @precisionbigmemtest(size=_2G, memuse=1)
843 def test_large_search(self, size):
844 # Issue #10182: indices were 32-bit-truncated.
845 s = 'a' * size
846 m = re.search('$', s)
847 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100848 self.assertEqual(m.start(), size)
849 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100850
Antoine Pitroub83575b2012-12-02 12:52:36 +0100851 # The huge memuse is because of re.sub() using a list and a join()
852 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100853 @precisionbigmemtest(size=_2G, memuse=16 + 2)
854 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100855 # Issue #10182: indices were 32-bit-truncated.
856 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100857 r, n = re.subn('', '', s)
858 self.assertEqual(r, s)
859 self.assertEqual(n, size + 1)
860
861
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200862 def test_repeat_minmax_overflow(self):
863 # Issue #13169
864 string = "x" * 100000
865 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
866 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
867 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
868 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
869 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
870 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
871 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
872 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
873 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
874 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
875 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
876
877 @cpython_only
878 def test_repeat_minmax_overflow_maxrepeat(self):
879 try:
880 from _sre import MAXREPEAT
881 except ImportError:
882 self.skipTest('requires _sre.MAXREPEAT constant')
883 string = "x" * 100000
884 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
885 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
886 (0, 100000))
887 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
888 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
889 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
890 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
891
R David Murray60773392013-04-14 13:08:50 -0400892 def test_backref_group_name_in_exception(self):
893 # Issue 17341: Poor error message when compiling invalid regex
894 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
895 re.compile('(?P=<foo>)')
896
897 def test_group_name_in_exception(self):
898 # Issue 17341: Poor error message when compiling invalid regex
899 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
900 re.compile('(?P<?foo>)')
901
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300902 def test_issue17998(self):
903 for reps in '*', '+', '?', '{1}':
904 for mod in '', '?':
905 pattern = '.' + reps + mod + 'yz'
906 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
907 ['xyz'], msg=pattern)
908 pattern = pattern.encode()
909 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
910 [b'xyz'], msg=pattern)
911
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200912
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300913 def test_bug_2537(self):
914 # issue 2537: empty submatches
915 for outer_op in ('{0,}', '*', '+', '{1,187}'):
916 for inner_op in ('{0,}', '*', '?'):
917 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
918 m = r.match("xyyzy")
919 self.assertEqual(m.group(0), "xyy")
920 self.assertEqual(m.group(1), "")
921 self.assertEqual(m.group(2), "y")
922
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000923def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000924 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000925 if verbose:
926 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000927 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000928 # To save time, only run the first and last 10 tests
929 #tests = tests[:10] + tests[-10:]
930 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000931
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000932 for t in tests:
933 sys.stdout.flush()
934 pattern = s = outcome = repl = expected = None
935 if len(t) == 5:
936 pattern, s, outcome, repl, expected = t
937 elif len(t) == 3:
938 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000939 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000940 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
941
Guido van Rossum41360a41998-03-26 19:42:58 +0000942 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000943 obj = re.compile(pattern)
944 except re.error:
945 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000946 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000947 print '=== Syntax error:', t
948 except KeyboardInterrupt: raise KeyboardInterrupt
949 except:
950 print '*** Unexpected error ***', t
951 if verbose:
952 traceback.print_exc(file=sys.stdout)
953 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000954 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000955 result = obj.search(s)
956 except re.error, msg:
957 print '=== Unexpected exception', t, repr(msg)
958 if outcome == SYNTAX_ERROR:
959 # This should have been a syntax error; forget it.
960 pass
961 elif outcome == FAIL:
962 if result is None: pass # No match, as expected
963 else: print '=== Succeeded incorrectly', t
964 elif outcome == SUCCEED:
965 if result is not None:
966 # Matched, as expected, so now we compute the
967 # result string and compare it to our expected result.
968 start, end = result.span(0)
969 vardict={'found': result.group(0),
970 'groups': result.group(),
971 'flags': result.re.flags}
972 for i in range(1, 100):
973 try:
974 gi = result.group(i)
975 # Special hack because else the string concat fails:
976 if gi is None:
977 gi = "None"
978 except IndexError:
979 gi = "Error"
980 vardict['g%d' % i] = gi
981 for i in result.re.groupindex.keys():
982 try:
983 gi = result.group(i)
984 if gi is None:
985 gi = "None"
986 except IndexError:
987 gi = "Error"
988 vardict[i] = gi
989 repl = eval(repl, vardict)
990 if repl != expected:
991 print '=== grouping error', t,
992 print repr(repl) + ' should be ' + repr(expected)
993 else:
994 print '=== Failed incorrectly', t
995
996 # Try the match on a unicode string, and check that it
997 # still succeeds.
998 try:
999 result = obj.search(unicode(s, "latin-1"))
1000 if result is None:
1001 print '=== Fails on unicode match', t
1002 except NameError:
1003 continue # 1.5.2
1004 except TypeError:
1005 continue # unicode test case
1006
1007 # Try the match on a unicode pattern, and check that it
1008 # still succeeds.
1009 obj=re.compile(unicode(pattern, "latin-1"))
1010 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001011 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001012 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001013
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001014 # Try the match with the search area limited to the extent
1015 # of the match and see if it still succeeds. \B will
1016 # break (because it won't match at the end or start of a
1017 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001018
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001019 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1020 and result is not None:
1021 obj = re.compile(pattern)
1022 result = obj.search(s, result.start(0), result.end(0) + 1)
1023 if result is None:
1024 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001025
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001026 # Try the match with IGNORECASE enabled, and check that it
1027 # still succeeds.
1028 obj = re.compile(pattern, re.IGNORECASE)
1029 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001030 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001031 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001032
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001033 # Try the match with LOCALE enabled, and check that it
1034 # still succeeds.
1035 obj = re.compile(pattern, re.LOCALE)
1036 result = obj.search(s)
1037 if result is None:
1038 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001039
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001040 # Try the match with UNICODE locale enabled, and check
1041 # that it still succeeds.
1042 obj = re.compile(pattern, re.UNICODE)
1043 result = obj.search(s)
1044 if result is None:
1045 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001046
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001047def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001048 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001049 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001050
1051if __name__ == "__main__":
1052 test_main()