blob: 6f6d014cb1c64d3693474464c5dcbea9b59aa994 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00003from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02004import sys
5import string
6import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00007from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020012# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000018
19 def test_weakref(self):
20 s = 'QabbbcR'
21 x = re.compile('ab+c')
22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24
Skip Montanaro8ed06da2003-04-24 19:43:18 +000025 def test_search_star_plus(self):
26 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000030 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000031 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000035 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000036
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000038 int_value = int(matchobj.group(0))
39 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_basic_re_sub(self):
42 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000047
Skip Montanaro8ed06da2003-04-24 19:43:18 +000048 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 s = r"\1\1"
52 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
54 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000068
Skip Montanaro2726fcd2003-04-25 14:31:54 +000069 def test_bug_449964(self):
70 # fails for group followed by other escape
71 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b')
73
74 def test_bug_449000(self):
75 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000084
Guido van Rossum1ff91d92007-09-10 22:02:25 +000085 def test_bug_1140(self):
86 # re.sub(x, y, u'') should return u'', not '', and
87 # re.sub(x, y, '') should return '', not u''.
88 # Also:
89 # re.sub(x, y, unicode(x)) should return unicode(y), and
90 # re.sub(x, y, str(x)) should return
91 # str(y) if isinstance(y, str) else unicode(y).
92 for x in 'x', u'x':
93 for y in 'y', u'y':
94 z = re.sub(x, y, u'')
95 self.assertEqual(z, u'')
96 self.assertEqual(type(z), unicode)
97 #
98 z = re.sub(x, y, '')
99 self.assertEqual(z, '')
100 self.assertEqual(type(z), str)
101 #
102 z = re.sub(x, y, unicode(x))
103 self.assertEqual(z, y)
104 self.assertEqual(type(z), unicode)
105 #
106 z = re.sub(x, y, str(x))
107 self.assertEqual(z, y)
108 self.assertEqual(type(z), type(y))
109
Raymond Hettinger80016c92007-12-19 18:13:31 +0000110 def test_bug_1661(self):
111 # Verify that flags do not get silently ignored with compiled patterns
112 pattern = re.compile('.')
113 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
114 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.compile, pattern, re.I)
117
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000118 def test_bug_3629(self):
119 # A regex that triggered a bug in the sre-code validator
120 re.compile("(?P<quote>)(?(quote))")
121
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000122 def test_sub_template_numeric_escape(self):
123 # bug 776311 and friends
124 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
125 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
126 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
127 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
128 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
129 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
130 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
131
132 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
134
135 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
136 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
137 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
138 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
139 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
140
141 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000143
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000144 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
153 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
155 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
156
157 # in python2.3 (etc), these loop endlessly in sre_parser.py
158 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
159 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
160 'xz8')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
162 'xza')
163
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000164 def test_qualified_re_sub(self):
165 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
166 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000167
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000168 def test_bug_114660(self):
169 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
170 'hello there')
171
172 def test_bug_462270(self):
173 # Test for empty sub() behaviour, see SF bug #462270
174 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
175 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
176
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000177 def test_symbolic_refs(self):
178 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
180 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
181 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
183 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
184 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000186 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000187
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000188 def test_re_subn(self):
189 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
190 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
191 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
192 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
193 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_split(self):
196 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
197 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
198 self.assertEqual(re.split("(:*)", ":a:b::c"),
199 ['', ':', 'a', ':', 'b', '::', 'c'])
200 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
201 self.assertEqual(re.split("(:)*", ":a:b::c"),
202 ['', ':', 'a', ':', 'b', ':', 'c'])
203 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
204 ['', ':', 'a', ':b::', 'c'])
205 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
206 ['', None, ':', 'a', None, ':', '', 'b', None, '',
207 None, '::', 'c'])
208 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
209 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 def test_qualified_re_split(self):
212 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
213 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
214 self.assertEqual(re.split("(:)", ":a:b::c", 2),
215 ['', ':', 'a', ':', 'b::c'])
216 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
217 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000218
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000219 def test_re_findall(self):
220 self.assertEqual(re.findall(":+", "abc"), [])
221 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
222 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
223 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
224 (":", ":"),
225 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro5ba00542003-04-25 16:00:14 +0000227 def test_bug_117612(self):
228 self.assertEqual(re.findall(r"(a|(b))", "aba"),
229 [("a", ""),("b", "b"),("a", "")])
230
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000231 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000232 self.assertEqual(re.match('a', 'a').groups(), ())
233 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
234 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
235 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
236 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000237
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000238 pat = re.compile('((a)|(b))(c)?')
239 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
240 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
241 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
242 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
243 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000244
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000245 # A single group
246 m = re.match('(a)', 'a')
247 self.assertEqual(m.group(0), 'a')
248 self.assertEqual(m.group(0), 'a')
249 self.assertEqual(m.group(1), 'a')
250 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000251
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000252 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
253 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
254 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
255 (None, 'b', None))
256 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000257
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000258 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000259 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
260 ('(', 'a'))
261 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
262 (None, 'a'))
263 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
264 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
265 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
266 ('a', 'b'))
267 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
268 (None, 'd'))
269 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
270 (None, 'd'))
271 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
272 ('a', ''))
273
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000274 # Tests for bug #1177831: exercise groups other than the first group
275 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
276 self.assertEqual(p.match('abc').groups(),
277 ('a', 'b', 'c'))
278 self.assertEqual(p.match('ad').groups(),
279 ('a', None, 'd'))
280 self.assertEqual(p.match('abd'), None)
281 self.assertEqual(p.match('ac'), None)
282
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000283
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 def test_re_groupref(self):
285 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
286 ('|', 'a'))
287 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
288 (None, 'a'))
289 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
290 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
291 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
292 ('a', 'a'))
293 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
294 (None, None))
295
296 def test_groupdict(self):
297 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
298 'first second').groupdict(),
299 {'first':'first', 'second':'second'})
300
301 def test_expand(self):
302 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
303 "first second")
304 .expand(r"\2 \1 \g<second> \g<first>"),
305 "second first second first")
306
307 def test_repeat_minmax(self):
308 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
309 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
310 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
311 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
312
313 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
314 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
318 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
319 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
320 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
321
322 self.assertEqual(re.match("^x{1}$", "xxx"), None)
323 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
324 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
325 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
326
327 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
328 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
332 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
333 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
334 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
335
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000336 self.assertEqual(re.match("^x{}$", "xxx"), None)
337 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
338
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000339 def test_getattr(self):
340 self.assertEqual(re.match("(a)", "a").pos, 0)
341 self.assertEqual(re.match("(a)", "a").endpos, 1)
342 self.assertEqual(re.match("(a)", "a").string, "a")
343 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
344 self.assertNotEqual(re.match("(a)", "a").re, None)
345
346 def test_special_escapes(self):
347 self.assertEqual(re.search(r"\b(b.)\b",
348 "abcd abc bcd bx").group(1), "bx")
349 self.assertEqual(re.search(r"\B(b.)\B",
350 "abc bcd bc abxd").group(1), "bx")
351 self.assertEqual(re.search(r"\b(b.)\b",
352 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
353 self.assertEqual(re.search(r"\B(b.)\B",
354 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
355 self.assertEqual(re.search(r"\b(b.)\b",
356 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
357 self.assertEqual(re.search(r"\B(b.)\B",
358 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
359 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
360 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
361 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
362 self.assertEqual(re.search(r"\b(b.)\b",
363 u"abcd abc bcd bx").group(1), "bx")
364 self.assertEqual(re.search(r"\B(b.)\B",
365 u"abc bcd bc abxd").group(1), "bx")
366 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
367 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
368 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
369 self.assertEqual(re.search(r"\d\D\w\W\s\S",
370 "1aa! a").group(0), "1aa! a")
371 self.assertEqual(re.search(r"\d\D\w\W\s\S",
372 "1aa! a", re.LOCALE).group(0), "1aa! a")
373 self.assertEqual(re.search(r"\d\D\w\W\s\S",
374 "1aa! a", re.UNICODE).group(0), "1aa! a")
375
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200376 def test_string_boundaries(self):
377 # See http://bugs.python.org/issue10713
378 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
379 "abc")
380 # There's a word boundary at the start of a string.
381 self.assertTrue(re.match(r"\b", "abc"))
382 # A non-empty string includes a non-boundary zero-length match.
383 self.assertTrue(re.search(r"\B", "abc"))
384 # There is no non-boundary match at the start of a string.
385 self.assertFalse(re.match(r"\B", "abc"))
386 # However, an empty string contains no word boundaries, and also no
387 # non-boundaries.
388 self.assertEqual(re.search(r"\B", ""), None)
389 # This one is questionable and different from the perlre behaviour,
390 # but describes current behavior.
391 self.assertEqual(re.search(r"\b", ""), None)
392 # A single word-character string has two boundaries, but no
393 # non-boundary gaps.
394 self.assertEqual(len(re.findall(r"\b", "a")), 2)
395 self.assertEqual(len(re.findall(r"\B", "a")), 0)
396 # If there are no words, there are no boundaries
397 self.assertEqual(len(re.findall(r"\b", " ")), 0)
398 self.assertEqual(len(re.findall(r"\b", " ")), 0)
399 # Can match around the whitespace.
400 self.assertEqual(len(re.findall(r"\B", " ")), 2)
401
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000402 def test_bigcharset(self):
403 self.assertEqual(re.match(u"([\u2222\u2223])",
404 u"\u2222").group(1), u"\u2222")
405 self.assertEqual(re.match(u"([\u2222\u2223])",
406 u"\u2222", re.UNICODE).group(1), u"\u2222")
407
408 def test_anyall(self):
409 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
410 "a\nb")
411 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
412 "a\n\nb")
413
414 def test_non_consuming(self):
415 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
416 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
417 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
418 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
419 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
420 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
421 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
422
423 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
424 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
425 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
426 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
427
428 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000429 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
430 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000431 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
432 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
433 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
434 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
435 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
436 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
437 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
438 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
439
440 def test_category(self):
441 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
442
443 def test_getlower(self):
444 import _sre
445 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
446 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
447 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
448
449 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
450 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
451
452 def test_not_literal(self):
453 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
454 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
455
456 def test_search_coverage(self):
457 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
458 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
459
Ezio Melotti46645632011-03-25 14:50:52 +0200460 def assertMatch(self, pattern, text, match=None, span=None,
461 matcher=re.match):
462 if match is None and span is None:
463 # the pattern matches the whole text
464 match = text
465 span = (0, len(text))
466 elif match is None or span is None:
467 raise ValueError('If match is not None, span should be specified '
468 '(and vice versa).')
469 m = matcher(pattern, text)
470 self.assertTrue(m)
471 self.assertEqual(m.group(), match)
472 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000473
Ezio Melotti46645632011-03-25 14:50:52 +0200474 def test_re_escape(self):
475 alnum_chars = string.ascii_letters + string.digits
476 p = u''.join(unichr(i) for i in range(256))
477 for c in p:
478 if c in alnum_chars:
479 self.assertEqual(re.escape(c), c)
480 elif c == u'\x00':
481 self.assertEqual(re.escape(c), u'\\000')
482 else:
483 self.assertEqual(re.escape(c), u'\\' + c)
484 self.assertMatch(re.escape(c), c)
485 self.assertMatch(re.escape(p), p)
486
487 def test_re_escape_byte(self):
488 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
489 p = ''.join(chr(i) for i in range(256))
490 for b in p:
491 if b in alnum_chars:
492 self.assertEqual(re.escape(b), b)
493 elif b == b'\x00':
494 self.assertEqual(re.escape(b), b'\\000')
495 else:
496 self.assertEqual(re.escape(b), b'\\' + b)
497 self.assertMatch(re.escape(b), b)
498 self.assertMatch(re.escape(p), p)
499
500 def test_re_escape_non_ascii(self):
501 s = u'xxx\u2620\u2620\u2620xxx'
502 s_escaped = re.escape(s)
503 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
504 self.assertMatch(s_escaped, s)
505 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
506 u'x\u2620\u2620\u2620x', (2, 7), re.search)
507
508 def test_re_escape_non_ascii_bytes(self):
509 b = u'y\u2620y\u2620y'.encode('utf-8')
510 b_escaped = re.escape(b)
511 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
512 self.assertMatch(b_escaped, b)
513 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
514 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000515
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000516 def test_pickling(self):
517 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000518 self.pickle_test(pickle)
519 import cPickle
520 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000521 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000522 import_module("sre", deprecated=True)
523 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000524
525 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000526 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
527 s = pickle.dumps(oldpat)
528 newpat = pickle.loads(s)
529 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000530
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000531 def test_constants(self):
532 self.assertEqual(re.I, re.IGNORECASE)
533 self.assertEqual(re.L, re.LOCALE)
534 self.assertEqual(re.M, re.MULTILINE)
535 self.assertEqual(re.S, re.DOTALL)
536 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000537
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000538 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000539 for flag in [re.I, re.M, re.X, re.S, re.L]:
540 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000541
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000542 def test_sre_character_literals(self):
543 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
544 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
545 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
546 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
547 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
548 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
549 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
550 self.assertRaises(re.error, re.match, "\911", "")
551
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000552 def test_sre_character_class_literals(self):
553 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
554 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
555 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
556 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
557 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
558 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
559 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
560 self.assertRaises(re.error, re.match, "[\911]", "")
561
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000562 def test_bug_113254(self):
563 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
564 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
565 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
566
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000567 def test_bug_527371(self):
568 # bug described in patches 527371/672491
569 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
570 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
571 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
572 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
573 self.assertEqual(re.match("((a))", "a").lastindex, 1)
574
575 def test_bug_545855(self):
576 # bug 545855 -- This pattern failed to cause a compile error as it
577 # should, instead provoking a TypeError.
578 self.assertRaises(re.error, re.compile, 'foo[a-')
579
580 def test_bug_418626(self):
581 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
582 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
583 # pattern '*?' on a long string.
584 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
585 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
586 20003)
587 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000588 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000589 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000590 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000591
592 def test_bug_612074(self):
593 pat=u"["+re.escape(u"\u2039")+u"]"
594 self.assertEqual(re.compile(pat) and 1, 1)
595
Skip Montanaro1e703c62003-04-25 15:40:28 +0000596 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000597 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000598 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000599 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
600 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
601 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000602
603 def test_scanner(self):
604 def s_ident(scanner, token): return token
605 def s_operator(scanner, token): return "op%s" % token
606 def s_float(scanner, token): return float(token)
607 def s_int(scanner, token): return int(token)
608
609 scanner = Scanner([
610 (r"[a-zA-Z_]\w*", s_ident),
611 (r"\d+\.\d*", s_float),
612 (r"\d+", s_int),
613 (r"=|\+|-|\*|/", s_operator),
614 (r"\s+", None),
615 ])
616
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000617 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
618
Skip Montanaro1e703c62003-04-25 15:40:28 +0000619 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
620 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
621 'op+', 'bar'], ''))
622
Skip Montanaro5ba00542003-04-25 16:00:14 +0000623 def test_bug_448951(self):
624 # bug 448951 (similar to 429357, but with single char match)
625 # (Also test greedy matches.)
626 for op in '','?','*':
627 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
628 (None, None))
629 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
630 ('a:', 'a'))
631
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000632 def test_bug_725106(self):
633 # capturing groups in alternatives in repeats
634 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
635 ('b', 'a'))
636 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
637 ('c', 'b'))
638 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
639 ('b', None))
640 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
641 ('b', None))
642 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
643 ('b', 'a'))
644 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
645 ('c', 'b'))
646 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
647 ('b', None))
648 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
649 ('b', None))
650
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000651 def test_bug_725149(self):
652 # mark_stack_base restoring before restoring marks
653 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
654 ('a', None))
655 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
656 ('a', None, None))
657
Just van Rossum12723ba2003-07-02 20:03:04 +0000658 def test_bug_764548(self):
659 # bug 764548, re.compile() barfs on str/unicode subclasses
660 try:
661 unicode
662 except NameError:
663 return # no problem if we have no unicode
664 class my_unicode(unicode): pass
665 pat = re.compile(my_unicode("abc"))
666 self.assertEqual(pat.match("xyz"), None)
667
Skip Montanaro5ba00542003-04-25 16:00:14 +0000668 def test_finditer(self):
669 iter = re.finditer(r":+", "a:b::c:::d")
670 self.assertEqual([item.group(0) for item in iter],
671 [":", "::", ":::"])
672
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000673 def test_bug_926075(self):
674 try:
675 unicode
676 except NameError:
677 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000678 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000679 re.compile(eval("u'bug_926075'")))
680
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000681 def test_bug_931848(self):
682 try:
683 unicode
684 except NameError:
685 pass
686 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
687 self.assertEqual(re.compile(pattern).split("a.b.c"),
688 ['a','b','c'])
689
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000690 def test_bug_581080(self):
691 iter = re.finditer(r"\s", "a b")
692 self.assertEqual(iter.next().span(), (1,2))
693 self.assertRaises(StopIteration, iter.next)
694
695 scanner = re.compile(r"\s").scanner("a b")
696 self.assertEqual(scanner.search().span(), (1, 2))
697 self.assertEqual(scanner.search(), None)
698
699 def test_bug_817234(self):
700 iter = re.finditer(r".*", "asdf")
701 self.assertEqual(iter.next().span(), (0, 4))
702 self.assertEqual(iter.next().span(), (4, 4))
703 self.assertRaises(StopIteration, iter.next)
704
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000705 def test_bug_6561(self):
706 # '\d' should match characters in Unicode category 'Nd'
707 # (Number, Decimal Digit), but not those in 'Nl' (Number,
708 # Letter) or 'No' (Number, Other).
709 decimal_digits = [
710 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
711 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
712 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
713 ]
714 for x in decimal_digits:
715 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
716
717 not_decimal_digits = [
718 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
719 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
720 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
721 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
722 ]
723 for x in not_decimal_digits:
724 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
725
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000726 def test_empty_array(self):
727 # SF buf 1647541
728 import array
729 for typecode in 'cbBuhHiIlLfd':
730 a = array.array(typecode)
731 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000732 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000733
Guido van Rossumae04c332008-01-03 19:12:44 +0000734 def test_inline_flags(self):
735 # Bug #1700
736 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
737 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
738
739 p = re.compile(upper_char, re.I | re.U)
740 q = p.match(lower_char)
741 self.assertNotEqual(q, None)
742
743 p = re.compile(lower_char, re.I | re.U)
744 q = p.match(upper_char)
745 self.assertNotEqual(q, None)
746
747 p = re.compile('(?i)' + upper_char, re.U)
748 q = p.match(lower_char)
749 self.assertNotEqual(q, None)
750
751 p = re.compile('(?i)' + lower_char, re.U)
752 q = p.match(upper_char)
753 self.assertNotEqual(q, None)
754
755 p = re.compile('(?iu)' + upper_char)
756 q = p.match(lower_char)
757 self.assertNotEqual(q, None)
758
759 p = re.compile('(?iu)' + lower_char)
760 q = p.match(upper_char)
761 self.assertNotEqual(q, None)
762
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000763 def test_dollar_matches_twice(self):
764 "$ matches the end of string, and just before the terminating \n"
765 pattern = re.compile('$')
766 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
767 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
768 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
769
770 pattern = re.compile('$', re.MULTILINE)
771 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
772 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
773 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
774
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000775 def test_dealloc(self):
776 # issue 3299: check for segfault in debug build
777 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000778 # the overflow limit is different on wide and narrow builds and it
779 # depends on the definition of SRE_CODE (see sre.h).
780 # 2**128 should be big enough to overflow on both. For smaller values
781 # a RuntimeError is raised instead of OverflowError.
782 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000783 self.assertRaises(TypeError, re.finditer, "a", {})
784 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000785
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200786 def test_compile(self):
787 # Test return value when given string and pattern as parameter
788 pattern = re.compile('random pattern')
789 self.assertIsInstance(pattern, re._pattern_type)
790 same_pattern = re.compile(pattern)
791 self.assertIsInstance(same_pattern, re._pattern_type)
792 self.assertIs(same_pattern, pattern)
793 # Test behaviour when not given a string or pattern as parameter
794 self.assertRaises(TypeError, re.compile, 0)
795
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000796def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000797 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000798 if verbose:
799 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000800 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000801 # To save time, only run the first and last 10 tests
802 #tests = tests[:10] + tests[-10:]
803 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000804
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000805 for t in tests:
806 sys.stdout.flush()
807 pattern = s = outcome = repl = expected = None
808 if len(t) == 5:
809 pattern, s, outcome, repl, expected = t
810 elif len(t) == 3:
811 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000812 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000813 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
814
Guido van Rossum41360a41998-03-26 19:42:58 +0000815 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000816 obj = re.compile(pattern)
817 except re.error:
818 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000819 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000820 print '=== Syntax error:', t
821 except KeyboardInterrupt: raise KeyboardInterrupt
822 except:
823 print '*** Unexpected error ***', t
824 if verbose:
825 traceback.print_exc(file=sys.stdout)
826 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000827 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000828 result = obj.search(s)
829 except re.error, msg:
830 print '=== Unexpected exception', t, repr(msg)
831 if outcome == SYNTAX_ERROR:
832 # This should have been a syntax error; forget it.
833 pass
834 elif outcome == FAIL:
835 if result is None: pass # No match, as expected
836 else: print '=== Succeeded incorrectly', t
837 elif outcome == SUCCEED:
838 if result is not None:
839 # Matched, as expected, so now we compute the
840 # result string and compare it to our expected result.
841 start, end = result.span(0)
842 vardict={'found': result.group(0),
843 'groups': result.group(),
844 'flags': result.re.flags}
845 for i in range(1, 100):
846 try:
847 gi = result.group(i)
848 # Special hack because else the string concat fails:
849 if gi is None:
850 gi = "None"
851 except IndexError:
852 gi = "Error"
853 vardict['g%d' % i] = gi
854 for i in result.re.groupindex.keys():
855 try:
856 gi = result.group(i)
857 if gi is None:
858 gi = "None"
859 except IndexError:
860 gi = "Error"
861 vardict[i] = gi
862 repl = eval(repl, vardict)
863 if repl != expected:
864 print '=== grouping error', t,
865 print repr(repl) + ' should be ' + repr(expected)
866 else:
867 print '=== Failed incorrectly', t
868
869 # Try the match on a unicode string, and check that it
870 # still succeeds.
871 try:
872 result = obj.search(unicode(s, "latin-1"))
873 if result is None:
874 print '=== Fails on unicode match', t
875 except NameError:
876 continue # 1.5.2
877 except TypeError:
878 continue # unicode test case
879
880 # Try the match on a unicode pattern, and check that it
881 # still succeeds.
882 obj=re.compile(unicode(pattern, "latin-1"))
883 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000884 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000885 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000886
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000887 # Try the match with the search area limited to the extent
888 # of the match and see if it still succeeds. \B will
889 # break (because it won't match at the end or start of a
890 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000891
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000892 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
893 and result is not None:
894 obj = re.compile(pattern)
895 result = obj.search(s, result.start(0), result.end(0) + 1)
896 if result is None:
897 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000898
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000899 # Try the match with IGNORECASE enabled, and check that it
900 # still succeeds.
901 obj = re.compile(pattern, re.IGNORECASE)
902 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000903 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000904 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000905
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000906 # Try the match with LOCALE enabled, and check that it
907 # still succeeds.
908 obj = re.compile(pattern, re.LOCALE)
909 result = obj.search(s)
910 if result is None:
911 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000912
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000913 # Try the match with UNICODE locale enabled, and check
914 # that it still succeeds.
915 obj = re.compile(pattern, re.UNICODE)
916 result = obj.search(s)
917 if result is None:
918 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000919
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000920def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000921 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000922 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000923
924if __name__ == "__main__":
925 test_main()