blob: ff2c953517840be7f42c054a0e924398d13352c1 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00003from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02004import sys
5import string
6import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00007from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020012# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000018
19 def test_weakref(self):
20 s = 'QabbbcR'
21 x = re.compile('ab+c')
22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24
Skip Montanaro8ed06da2003-04-24 19:43:18 +000025 def test_search_star_plus(self):
26 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000030 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000031 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000035 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000036
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000038 int_value = int(matchobj.group(0))
39 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_basic_re_sub(self):
42 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000047
Skip Montanaro8ed06da2003-04-24 19:43:18 +000048 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 s = r"\1\1"
52 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
54 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000068
Skip Montanaro2726fcd2003-04-25 14:31:54 +000069 def test_bug_449964(self):
70 # fails for group followed by other escape
71 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b')
73
74 def test_bug_449000(self):
75 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000084
Guido van Rossum1ff91d92007-09-10 22:02:25 +000085 def test_bug_1140(self):
86 # re.sub(x, y, u'') should return u'', not '', and
87 # re.sub(x, y, '') should return '', not u''.
88 # Also:
89 # re.sub(x, y, unicode(x)) should return unicode(y), and
90 # re.sub(x, y, str(x)) should return
91 # str(y) if isinstance(y, str) else unicode(y).
92 for x in 'x', u'x':
93 for y in 'y', u'y':
94 z = re.sub(x, y, u'')
95 self.assertEqual(z, u'')
96 self.assertEqual(type(z), unicode)
97 #
98 z = re.sub(x, y, '')
99 self.assertEqual(z, '')
100 self.assertEqual(type(z), str)
101 #
102 z = re.sub(x, y, unicode(x))
103 self.assertEqual(z, y)
104 self.assertEqual(type(z), unicode)
105 #
106 z = re.sub(x, y, str(x))
107 self.assertEqual(z, y)
108 self.assertEqual(type(z), type(y))
109
Raymond Hettinger80016c92007-12-19 18:13:31 +0000110 def test_bug_1661(self):
111 # Verify that flags do not get silently ignored with compiled patterns
112 pattern = re.compile('.')
113 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
114 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.compile, pattern, re.I)
117
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000118 def test_bug_3629(self):
119 # A regex that triggered a bug in the sre-code validator
120 re.compile("(?P<quote>)(?(quote))")
121
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000122 def test_sub_template_numeric_escape(self):
123 # bug 776311 and friends
124 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
125 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
126 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
127 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
128 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
129 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
130 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
131
132 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
134
135 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
136 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
137 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
138 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
139 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
140
141 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000143
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000144 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
153 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
155 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
156
157 # in python2.3 (etc), these loop endlessly in sre_parser.py
158 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
159 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
160 'xz8')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
162 'xza')
163
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000164 def test_qualified_re_sub(self):
165 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
166 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000167
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000168 def test_bug_114660(self):
169 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
170 'hello there')
171
172 def test_bug_462270(self):
173 # Test for empty sub() behaviour, see SF bug #462270
174 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
175 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
176
Ezio Melottief317382012-11-03 20:31:12 +0200177 def test_symbolic_groups(self):
178 re.compile('(?P<a>x)(?P=a)(?(a)y)')
179 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
180 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
181 self.assertRaises(re.error, re.compile, '(?Px)')
182 self.assertRaises(re.error, re.compile, '(?P=)')
183 self.assertRaises(re.error, re.compile, '(?P=1)')
184 self.assertRaises(re.error, re.compile, '(?P=a)')
185 self.assertRaises(re.error, re.compile, '(?P=a1)')
186 self.assertRaises(re.error, re.compile, '(?P=a.)')
187 self.assertRaises(re.error, re.compile, '(?P<)')
188 self.assertRaises(re.error, re.compile, '(?P<>)')
189 self.assertRaises(re.error, re.compile, '(?P<1>)')
190 self.assertRaises(re.error, re.compile, '(?P<a.>)')
191 self.assertRaises(re.error, re.compile, '(?())')
192 self.assertRaises(re.error, re.compile, '(?(a))')
193 self.assertRaises(re.error, re.compile, '(?(1a))')
194 self.assertRaises(re.error, re.compile, '(?(a.))')
195
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000196 def test_symbolic_refs(self):
197 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
198 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
203 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000207
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 def test_re_subn(self):
209 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
210 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
211 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
212 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
213 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000214
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000215 def test_re_split(self):
216 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
217 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
218 self.assertEqual(re.split("(:*)", ":a:b::c"),
219 ['', ':', 'a', ':', 'b', '::', 'c'])
220 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
221 self.assertEqual(re.split("(:)*", ":a:b::c"),
222 ['', ':', 'a', ':', 'b', ':', 'c'])
223 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
224 ['', ':', 'a', ':b::', 'c'])
225 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
226 ['', None, ':', 'a', None, ':', '', 'b', None, '',
227 None, '::', 'c'])
228 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
229 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000230
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000231 def test_qualified_re_split(self):
232 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
233 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
234 self.assertEqual(re.split("(:)", ":a:b::c", 2),
235 ['', ':', 'a', ':', 'b::c'])
236 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
237 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000238
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000239 def test_re_findall(self):
240 self.assertEqual(re.findall(":+", "abc"), [])
241 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
242 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
243 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
244 (":", ":"),
245 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000246
Skip Montanaro5ba00542003-04-25 16:00:14 +0000247 def test_bug_117612(self):
248 self.assertEqual(re.findall(r"(a|(b))", "aba"),
249 [("a", ""),("b", "b"),("a", "")])
250
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000251 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000252 self.assertEqual(re.match('a', 'a').groups(), ())
253 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
254 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
255 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
256 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000257
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000258 pat = re.compile('((a)|(b))(c)?')
259 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
260 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
261 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
262 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
263 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000264
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000265 # A single group
266 m = re.match('(a)', 'a')
267 self.assertEqual(m.group(0), 'a')
268 self.assertEqual(m.group(0), 'a')
269 self.assertEqual(m.group(1), 'a')
270 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000271
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000272 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
273 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
274 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
275 (None, 'b', None))
276 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000277
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000278 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000279 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
280 ('(', 'a'))
281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
282 (None, 'a'))
283 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
285 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
286 ('a', 'b'))
287 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
288 (None, 'd'))
289 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
290 (None, 'd'))
291 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
292 ('a', ''))
293
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000294 # Tests for bug #1177831: exercise groups other than the first group
295 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
296 self.assertEqual(p.match('abc').groups(),
297 ('a', 'b', 'c'))
298 self.assertEqual(p.match('ad').groups(),
299 ('a', None, 'd'))
300 self.assertEqual(p.match('abd'), None)
301 self.assertEqual(p.match('ac'), None)
302
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000303
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000304 def test_re_groupref(self):
305 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
306 ('|', 'a'))
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
308 (None, 'a'))
309 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
311 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
312 ('a', 'a'))
313 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
314 (None, None))
315
316 def test_groupdict(self):
317 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
318 'first second').groupdict(),
319 {'first':'first', 'second':'second'})
320
321 def test_expand(self):
322 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
323 "first second")
324 .expand(r"\2 \1 \g<second> \g<first>"),
325 "second first second first")
326
327 def test_repeat_minmax(self):
328 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
329 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
330 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
331 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
332
333 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
334 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
335 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
341
342 self.assertEqual(re.match("^x{1}$", "xxx"), None)
343 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
344 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
345 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
346
347 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
348 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
349 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
355
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000356 self.assertEqual(re.match("^x{}$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
358
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000359 def test_getattr(self):
360 self.assertEqual(re.match("(a)", "a").pos, 0)
361 self.assertEqual(re.match("(a)", "a").endpos, 1)
362 self.assertEqual(re.match("(a)", "a").string, "a")
363 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
364 self.assertNotEqual(re.match("(a)", "a").re, None)
365
366 def test_special_escapes(self):
367 self.assertEqual(re.search(r"\b(b.)\b",
368 "abcd abc bcd bx").group(1), "bx")
369 self.assertEqual(re.search(r"\B(b.)\B",
370 "abc bcd bc abxd").group(1), "bx")
371 self.assertEqual(re.search(r"\b(b.)\b",
372 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
373 self.assertEqual(re.search(r"\B(b.)\B",
374 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
375 self.assertEqual(re.search(r"\b(b.)\b",
376 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
377 self.assertEqual(re.search(r"\B(b.)\B",
378 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
379 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
380 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
381 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
382 self.assertEqual(re.search(r"\b(b.)\b",
383 u"abcd abc bcd bx").group(1), "bx")
384 self.assertEqual(re.search(r"\B(b.)\B",
385 u"abc bcd bc abxd").group(1), "bx")
386 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
387 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
388 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
389 self.assertEqual(re.search(r"\d\D\w\W\s\S",
390 "1aa! a").group(0), "1aa! a")
391 self.assertEqual(re.search(r"\d\D\w\W\s\S",
392 "1aa! a", re.LOCALE).group(0), "1aa! a")
393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a", re.UNICODE).group(0), "1aa! a")
395
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200396 def test_string_boundaries(self):
397 # See http://bugs.python.org/issue10713
398 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
399 "abc")
400 # There's a word boundary at the start of a string.
401 self.assertTrue(re.match(r"\b", "abc"))
402 # A non-empty string includes a non-boundary zero-length match.
403 self.assertTrue(re.search(r"\B", "abc"))
404 # There is no non-boundary match at the start of a string.
405 self.assertFalse(re.match(r"\B", "abc"))
406 # However, an empty string contains no word boundaries, and also no
407 # non-boundaries.
408 self.assertEqual(re.search(r"\B", ""), None)
409 # This one is questionable and different from the perlre behaviour,
410 # but describes current behavior.
411 self.assertEqual(re.search(r"\b", ""), None)
412 # A single word-character string has two boundaries, but no
413 # non-boundary gaps.
414 self.assertEqual(len(re.findall(r"\b", "a")), 2)
415 self.assertEqual(len(re.findall(r"\B", "a")), 0)
416 # If there are no words, there are no boundaries
417 self.assertEqual(len(re.findall(r"\b", " ")), 0)
418 self.assertEqual(len(re.findall(r"\b", " ")), 0)
419 # Can match around the whitespace.
420 self.assertEqual(len(re.findall(r"\B", " ")), 2)
421
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000422 def test_bigcharset(self):
423 self.assertEqual(re.match(u"([\u2222\u2223])",
424 u"\u2222").group(1), u"\u2222")
425 self.assertEqual(re.match(u"([\u2222\u2223])",
426 u"\u2222", re.UNICODE).group(1), u"\u2222")
427
Antoine Pitroub83ea142012-11-20 22:30:42 +0100428 def test_big_codesize(self):
429 # Issue #1160
430 r = re.compile('|'.join(('%d'%x for x in range(10000))))
431 self.assertIsNotNone(r.match('1000'))
432 self.assertIsNotNone(r.match('9999'))
433
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000434 def test_anyall(self):
435 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
436 "a\nb")
437 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
438 "a\n\nb")
439
440 def test_non_consuming(self):
441 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
442 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
443 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
444 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
445 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
446 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
447 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
448
449 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
450 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
451 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
453
454 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000455 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
456 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000457 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
458 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
459 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
460 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
461 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
462 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
463 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
464 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
465
466 def test_category(self):
467 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
468
469 def test_getlower(self):
470 import _sre
471 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
472 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
473 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
474
475 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
476 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
477
478 def test_not_literal(self):
479 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
480 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
481
482 def test_search_coverage(self):
483 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
484 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
485
Ezio Melotti46645632011-03-25 14:50:52 +0200486 def assertMatch(self, pattern, text, match=None, span=None,
487 matcher=re.match):
488 if match is None and span is None:
489 # the pattern matches the whole text
490 match = text
491 span = (0, len(text))
492 elif match is None or span is None:
493 raise ValueError('If match is not None, span should be specified '
494 '(and vice versa).')
495 m = matcher(pattern, text)
496 self.assertTrue(m)
497 self.assertEqual(m.group(), match)
498 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000499
Ezio Melotti46645632011-03-25 14:50:52 +0200500 def test_re_escape(self):
501 alnum_chars = string.ascii_letters + string.digits
502 p = u''.join(unichr(i) for i in range(256))
503 for c in p:
504 if c in alnum_chars:
505 self.assertEqual(re.escape(c), c)
506 elif c == u'\x00':
507 self.assertEqual(re.escape(c), u'\\000')
508 else:
509 self.assertEqual(re.escape(c), u'\\' + c)
510 self.assertMatch(re.escape(c), c)
511 self.assertMatch(re.escape(p), p)
512
513 def test_re_escape_byte(self):
514 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
515 p = ''.join(chr(i) for i in range(256))
516 for b in p:
517 if b in alnum_chars:
518 self.assertEqual(re.escape(b), b)
519 elif b == b'\x00':
520 self.assertEqual(re.escape(b), b'\\000')
521 else:
522 self.assertEqual(re.escape(b), b'\\' + b)
523 self.assertMatch(re.escape(b), b)
524 self.assertMatch(re.escape(p), p)
525
526 def test_re_escape_non_ascii(self):
527 s = u'xxx\u2620\u2620\u2620xxx'
528 s_escaped = re.escape(s)
529 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
530 self.assertMatch(s_escaped, s)
531 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
532 u'x\u2620\u2620\u2620x', (2, 7), re.search)
533
534 def test_re_escape_non_ascii_bytes(self):
535 b = u'y\u2620y\u2620y'.encode('utf-8')
536 b_escaped = re.escape(b)
537 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
538 self.assertMatch(b_escaped, b)
539 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
540 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000541
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000542 def test_pickling(self):
543 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000544 self.pickle_test(pickle)
545 import cPickle
546 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000547 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000548 import_module("sre", deprecated=True)
549 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000550
551 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000552 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
553 s = pickle.dumps(oldpat)
554 newpat = pickle.loads(s)
555 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000556
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000557 def test_constants(self):
558 self.assertEqual(re.I, re.IGNORECASE)
559 self.assertEqual(re.L, re.LOCALE)
560 self.assertEqual(re.M, re.MULTILINE)
561 self.assertEqual(re.S, re.DOTALL)
562 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000563
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000564 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000565 for flag in [re.I, re.M, re.X, re.S, re.L]:
566 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000567
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000568 def test_sre_character_literals(self):
569 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
570 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
571 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
572 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
573 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
574 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
575 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
576 self.assertRaises(re.error, re.match, "\911", "")
577
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000578 def test_sre_character_class_literals(self):
579 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
580 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
581 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
582 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
583 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
584 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
585 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
586 self.assertRaises(re.error, re.match, "[\911]", "")
587
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000588 def test_bug_113254(self):
589 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
590 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
591 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
592
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000593 def test_bug_527371(self):
594 # bug described in patches 527371/672491
595 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
596 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
597 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
598 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
599 self.assertEqual(re.match("((a))", "a").lastindex, 1)
600
601 def test_bug_545855(self):
602 # bug 545855 -- This pattern failed to cause a compile error as it
603 # should, instead provoking a TypeError.
604 self.assertRaises(re.error, re.compile, 'foo[a-')
605
606 def test_bug_418626(self):
607 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
608 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
609 # pattern '*?' on a long string.
610 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
611 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
612 20003)
613 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000614 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000615 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000616 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000617
618 def test_bug_612074(self):
619 pat=u"["+re.escape(u"\u2039")+u"]"
620 self.assertEqual(re.compile(pat) and 1, 1)
621
Skip Montanaro1e703c62003-04-25 15:40:28 +0000622 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000623 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000624 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000625 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
626 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
627 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000628
629 def test_scanner(self):
630 def s_ident(scanner, token): return token
631 def s_operator(scanner, token): return "op%s" % token
632 def s_float(scanner, token): return float(token)
633 def s_int(scanner, token): return int(token)
634
635 scanner = Scanner([
636 (r"[a-zA-Z_]\w*", s_ident),
637 (r"\d+\.\d*", s_float),
638 (r"\d+", s_int),
639 (r"=|\+|-|\*|/", s_operator),
640 (r"\s+", None),
641 ])
642
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000643 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
644
Skip Montanaro1e703c62003-04-25 15:40:28 +0000645 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
646 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
647 'op+', 'bar'], ''))
648
Skip Montanaro5ba00542003-04-25 16:00:14 +0000649 def test_bug_448951(self):
650 # bug 448951 (similar to 429357, but with single char match)
651 # (Also test greedy matches.)
652 for op in '','?','*':
653 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
654 (None, None))
655 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
656 ('a:', 'a'))
657
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000658 def test_bug_725106(self):
659 # capturing groups in alternatives in repeats
660 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
661 ('b', 'a'))
662 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
663 ('c', 'b'))
664 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
665 ('b', None))
666 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
667 ('b', None))
668 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
669 ('b', 'a'))
670 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
671 ('c', 'b'))
672 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
673 ('b', None))
674 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
675 ('b', None))
676
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000677 def test_bug_725149(self):
678 # mark_stack_base restoring before restoring marks
679 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
680 ('a', None))
681 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
682 ('a', None, None))
683
Just van Rossum12723ba2003-07-02 20:03:04 +0000684 def test_bug_764548(self):
685 # bug 764548, re.compile() barfs on str/unicode subclasses
686 try:
687 unicode
688 except NameError:
689 return # no problem if we have no unicode
690 class my_unicode(unicode): pass
691 pat = re.compile(my_unicode("abc"))
692 self.assertEqual(pat.match("xyz"), None)
693
Skip Montanaro5ba00542003-04-25 16:00:14 +0000694 def test_finditer(self):
695 iter = re.finditer(r":+", "a:b::c:::d")
696 self.assertEqual([item.group(0) for item in iter],
697 [":", "::", ":::"])
698
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000699 def test_bug_926075(self):
700 try:
701 unicode
702 except NameError:
703 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000704 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000705 re.compile(eval("u'bug_926075'")))
706
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000707 def test_bug_931848(self):
708 try:
709 unicode
710 except NameError:
711 pass
712 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
713 self.assertEqual(re.compile(pattern).split("a.b.c"),
714 ['a','b','c'])
715
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000716 def test_bug_581080(self):
717 iter = re.finditer(r"\s", "a b")
718 self.assertEqual(iter.next().span(), (1,2))
719 self.assertRaises(StopIteration, iter.next)
720
721 scanner = re.compile(r"\s").scanner("a b")
722 self.assertEqual(scanner.search().span(), (1, 2))
723 self.assertEqual(scanner.search(), None)
724
725 def test_bug_817234(self):
726 iter = re.finditer(r".*", "asdf")
727 self.assertEqual(iter.next().span(), (0, 4))
728 self.assertEqual(iter.next().span(), (4, 4))
729 self.assertRaises(StopIteration, iter.next)
730
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000731 def test_bug_6561(self):
732 # '\d' should match characters in Unicode category 'Nd'
733 # (Number, Decimal Digit), but not those in 'Nl' (Number,
734 # Letter) or 'No' (Number, Other).
735 decimal_digits = [
736 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
737 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
738 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
739 ]
740 for x in decimal_digits:
741 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
742
743 not_decimal_digits = [
744 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
745 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
746 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
747 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
748 ]
749 for x in not_decimal_digits:
750 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
751
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000752 def test_empty_array(self):
753 # SF buf 1647541
754 import array
755 for typecode in 'cbBuhHiIlLfd':
756 a = array.array(typecode)
757 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000758 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000759
Guido van Rossumae04c332008-01-03 19:12:44 +0000760 def test_inline_flags(self):
761 # Bug #1700
762 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
763 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
764
765 p = re.compile(upper_char, re.I | re.U)
766 q = p.match(lower_char)
767 self.assertNotEqual(q, None)
768
769 p = re.compile(lower_char, re.I | re.U)
770 q = p.match(upper_char)
771 self.assertNotEqual(q, None)
772
773 p = re.compile('(?i)' + upper_char, re.U)
774 q = p.match(lower_char)
775 self.assertNotEqual(q, None)
776
777 p = re.compile('(?i)' + lower_char, re.U)
778 q = p.match(upper_char)
779 self.assertNotEqual(q, None)
780
781 p = re.compile('(?iu)' + upper_char)
782 q = p.match(lower_char)
783 self.assertNotEqual(q, None)
784
785 p = re.compile('(?iu)' + lower_char)
786 q = p.match(upper_char)
787 self.assertNotEqual(q, None)
788
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000789 def test_dollar_matches_twice(self):
790 "$ matches the end of string, and just before the terminating \n"
791 pattern = re.compile('$')
792 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
793 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
794 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
795
796 pattern = re.compile('$', re.MULTILINE)
797 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
798 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
799 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
800
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000801 def test_dealloc(self):
802 # issue 3299: check for segfault in debug build
803 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000804 # the overflow limit is different on wide and narrow builds and it
805 # depends on the definition of SRE_CODE (see sre.h).
806 # 2**128 should be big enough to overflow on both. For smaller values
807 # a RuntimeError is raised instead of OverflowError.
808 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000809 self.assertRaises(TypeError, re.finditer, "a", {})
810 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000811
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200812 def test_compile(self):
813 # Test return value when given string and pattern as parameter
814 pattern = re.compile('random pattern')
815 self.assertIsInstance(pattern, re._pattern_type)
816 same_pattern = re.compile(pattern)
817 self.assertIsInstance(same_pattern, re._pattern_type)
818 self.assertIs(same_pattern, pattern)
819 # Test behaviour when not given a string or pattern as parameter
820 self.assertRaises(TypeError, re.compile, 0)
821
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000822def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000823 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000824 if verbose:
825 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000826 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000827 # To save time, only run the first and last 10 tests
828 #tests = tests[:10] + tests[-10:]
829 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000830
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000831 for t in tests:
832 sys.stdout.flush()
833 pattern = s = outcome = repl = expected = None
834 if len(t) == 5:
835 pattern, s, outcome, repl, expected = t
836 elif len(t) == 3:
837 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000838 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000839 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
840
Guido van Rossum41360a41998-03-26 19:42:58 +0000841 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000842 obj = re.compile(pattern)
843 except re.error:
844 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000845 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000846 print '=== Syntax error:', t
847 except KeyboardInterrupt: raise KeyboardInterrupt
848 except:
849 print '*** Unexpected error ***', t
850 if verbose:
851 traceback.print_exc(file=sys.stdout)
852 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000853 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000854 result = obj.search(s)
855 except re.error, msg:
856 print '=== Unexpected exception', t, repr(msg)
857 if outcome == SYNTAX_ERROR:
858 # This should have been a syntax error; forget it.
859 pass
860 elif outcome == FAIL:
861 if result is None: pass # No match, as expected
862 else: print '=== Succeeded incorrectly', t
863 elif outcome == SUCCEED:
864 if result is not None:
865 # Matched, as expected, so now we compute the
866 # result string and compare it to our expected result.
867 start, end = result.span(0)
868 vardict={'found': result.group(0),
869 'groups': result.group(),
870 'flags': result.re.flags}
871 for i in range(1, 100):
872 try:
873 gi = result.group(i)
874 # Special hack because else the string concat fails:
875 if gi is None:
876 gi = "None"
877 except IndexError:
878 gi = "Error"
879 vardict['g%d' % i] = gi
880 for i in result.re.groupindex.keys():
881 try:
882 gi = result.group(i)
883 if gi is None:
884 gi = "None"
885 except IndexError:
886 gi = "Error"
887 vardict[i] = gi
888 repl = eval(repl, vardict)
889 if repl != expected:
890 print '=== grouping error', t,
891 print repr(repl) + ' should be ' + repr(expected)
892 else:
893 print '=== Failed incorrectly', t
894
895 # Try the match on a unicode string, and check that it
896 # still succeeds.
897 try:
898 result = obj.search(unicode(s, "latin-1"))
899 if result is None:
900 print '=== Fails on unicode match', t
901 except NameError:
902 continue # 1.5.2
903 except TypeError:
904 continue # unicode test case
905
906 # Try the match on a unicode pattern, and check that it
907 # still succeeds.
908 obj=re.compile(unicode(pattern, "latin-1"))
909 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000910 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000911 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000912
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000913 # Try the match with the search area limited to the extent
914 # of the match and see if it still succeeds. \B will
915 # break (because it won't match at the end or start of a
916 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000917
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000918 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
919 and result is not None:
920 obj = re.compile(pattern)
921 result = obj.search(s, result.start(0), result.end(0) + 1)
922 if result is None:
923 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000924
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000925 # Try the match with IGNORECASE enabled, and check that it
926 # still succeeds.
927 obj = re.compile(pattern, re.IGNORECASE)
928 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000929 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000930 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000931
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000932 # Try the match with LOCALE enabled, and check that it
933 # still succeeds.
934 obj = re.compile(pattern, re.LOCALE)
935 result = obj.search(s)
936 if result is None:
937 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000938
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000939 # Try the match with UNICODE locale enabled, and check
940 # that it still succeeds.
941 obj = re.compile(pattern, re.UNICODE)
942 result = obj.search(s)
943 if result is None:
944 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000945
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000946def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000947 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000948 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000949
950if __name__ == "__main__":
951 test_main()