blob: 70702e63483d62d07e99116f7f550d617fccc8f5 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Antoine Pitroub83575b2012-12-02 12:52:36 +01002from test.test_support import precisionbigmemtest, _2G
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00004from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02005import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Antoine Pitrou735f36e2012-12-03 20:53:12 +010010
Guido van Rossum23b22571997-07-17 22:36:14 +000011# Misc tests from Tim Peters' re.doc
12
Just van Rossum6802c6e2003-07-02 14:36:59 +000013# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020014# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000015# cover most of the code.
16
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000018
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000020
21 def test_weakref(self):
22 s = 'QabbbcR'
23 x = re.compile('ab+c')
24 y = proxy(x)
25 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
26
Skip Montanaro8ed06da2003-04-24 19:43:18 +000027 def test_search_star_plus(self):
28 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
29 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
30 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
31 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000032 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000033 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
34 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
35 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
36 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000037 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000038
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000040 int_value = int(matchobj.group(0))
41 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000042
Skip Montanaro8ed06da2003-04-24 19:43:18 +000043 def test_basic_re_sub(self):
44 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
46 '9.3 -3 24x100y')
47 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
48 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000049
Skip Montanaro8ed06da2003-04-24 19:43:18 +000050 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
51 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 s = r"\1\1"
54 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
55 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
56 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000057
Skip Montanaro8ed06da2003-04-24 19:43:18 +000058 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000062
Skip Montanaro8ed06da2003-04-24 19:43:18 +000063 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
64 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
67 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000068
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000070
Skip Montanaro2726fcd2003-04-25 14:31:54 +000071 def test_bug_449964(self):
72 # fails for group followed by other escape
73 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
74 'xx\bxx\b')
75
76 def test_bug_449000(self):
77 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000078 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
84 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
85 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000086
Guido van Rossum1ff91d92007-09-10 22:02:25 +000087 def test_bug_1140(self):
88 # re.sub(x, y, u'') should return u'', not '', and
89 # re.sub(x, y, '') should return '', not u''.
90 # Also:
91 # re.sub(x, y, unicode(x)) should return unicode(y), and
92 # re.sub(x, y, str(x)) should return
93 # str(y) if isinstance(y, str) else unicode(y).
94 for x in 'x', u'x':
95 for y in 'y', u'y':
96 z = re.sub(x, y, u'')
97 self.assertEqual(z, u'')
98 self.assertEqual(type(z), unicode)
99 #
100 z = re.sub(x, y, '')
101 self.assertEqual(z, '')
102 self.assertEqual(type(z), str)
103 #
104 z = re.sub(x, y, unicode(x))
105 self.assertEqual(z, y)
106 self.assertEqual(type(z), unicode)
107 #
108 z = re.sub(x, y, str(x))
109 self.assertEqual(z, y)
110 self.assertEqual(type(z), type(y))
111
Raymond Hettinger80016c92007-12-19 18:13:31 +0000112 def test_bug_1661(self):
113 # Verify that flags do not get silently ignored with compiled patterns
114 pattern = re.compile('.')
115 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.compile, pattern, re.I)
119
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000120 def test_bug_3629(self):
121 # A regex that triggered a bug in the sre-code validator
122 re.compile("(?P<quote>)(?(quote))")
123
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000124 def test_sub_template_numeric_escape(self):
125 # bug 776311 and friends
126 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
127 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
128 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
129 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
130 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
131 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
132 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
133
134 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
136
137 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
138 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
139 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
140 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
141 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
142
143 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
144 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000145
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000146 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
155 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
157 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
158
159 # in python2.3 (etc), these loop endlessly in sre_parser.py
160 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
162 'xz8')
163 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
164 'xza')
165
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000166 def test_qualified_re_sub(self):
167 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
168 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000169
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000170 def test_bug_114660(self):
171 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
172 'hello there')
173
174 def test_bug_462270(self):
175 # Test for empty sub() behaviour, see SF bug #462270
176 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
177 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
178
Ezio Melottief317382012-11-03 20:31:12 +0200179 def test_symbolic_groups(self):
180 re.compile('(?P<a>x)(?P=a)(?(a)y)')
181 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
182 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
183 self.assertRaises(re.error, re.compile, '(?Px)')
184 self.assertRaises(re.error, re.compile, '(?P=)')
185 self.assertRaises(re.error, re.compile, '(?P=1)')
186 self.assertRaises(re.error, re.compile, '(?P=a)')
187 self.assertRaises(re.error, re.compile, '(?P=a1)')
188 self.assertRaises(re.error, re.compile, '(?P=a.)')
189 self.assertRaises(re.error, re.compile, '(?P<)')
190 self.assertRaises(re.error, re.compile, '(?P<>)')
191 self.assertRaises(re.error, re.compile, '(?P<1>)')
192 self.assertRaises(re.error, re.compile, '(?P<a.>)')
193 self.assertRaises(re.error, re.compile, '(?())')
194 self.assertRaises(re.error, re.compile, '(?(a))')
195 self.assertRaises(re.error, re.compile, '(?(1a))')
196 self.assertRaises(re.error, re.compile, '(?(a.))')
197
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000198 def test_symbolic_refs(self):
199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
205 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
206 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000208 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000209
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000210 def test_re_subn(self):
211 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
212 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
213 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
214 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
215 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000216
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000217 def test_re_split(self):
218 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
219 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
220 self.assertEqual(re.split("(:*)", ":a:b::c"),
221 ['', ':', 'a', ':', 'b', '::', 'c'])
222 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
223 self.assertEqual(re.split("(:)*", ":a:b::c"),
224 ['', ':', 'a', ':', 'b', ':', 'c'])
225 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
226 ['', ':', 'a', ':b::', 'c'])
227 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
228 ['', None, ':', 'a', None, ':', '', 'b', None, '',
229 None, '::', 'c'])
230 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
231 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000232
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000233 def test_qualified_re_split(self):
234 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
235 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
236 self.assertEqual(re.split("(:)", ":a:b::c", 2),
237 ['', ':', 'a', ':', 'b::c'])
238 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
239 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000240
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000241 def test_re_findall(self):
242 self.assertEqual(re.findall(":+", "abc"), [])
243 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
244 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
245 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
246 (":", ":"),
247 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000248
Skip Montanaro5ba00542003-04-25 16:00:14 +0000249 def test_bug_117612(self):
250 self.assertEqual(re.findall(r"(a|(b))", "aba"),
251 [("a", ""),("b", "b"),("a", "")])
252
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000253 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000254 self.assertEqual(re.match('a', 'a').groups(), ())
255 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
256 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
257 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
258 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000259
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000260 pat = re.compile('((a)|(b))(c)?')
261 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
262 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
263 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
264 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
265 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000266
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000267 # A single group
268 m = re.match('(a)', 'a')
269 self.assertEqual(m.group(0), 'a')
270 self.assertEqual(m.group(0), 'a')
271 self.assertEqual(m.group(1), 'a')
272 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000273
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000274 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
275 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
276 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
277 (None, 'b', None))
278 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000279
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000280 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
282 ('(', 'a'))
283 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
284 (None, 'a'))
285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
287 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
288 ('a', 'b'))
289 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
290 (None, 'd'))
291 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
292 (None, 'd'))
293 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
294 ('a', ''))
295
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000296 # Tests for bug #1177831: exercise groups other than the first group
297 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
298 self.assertEqual(p.match('abc').groups(),
299 ('a', 'b', 'c'))
300 self.assertEqual(p.match('ad').groups(),
301 ('a', None, 'd'))
302 self.assertEqual(p.match('abd'), None)
303 self.assertEqual(p.match('ac'), None)
304
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000305
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000306 def test_re_groupref(self):
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
308 ('|', 'a'))
309 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
310 (None, 'a'))
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
313 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
314 ('a', 'a'))
315 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
316 (None, None))
317
318 def test_groupdict(self):
319 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
320 'first second').groupdict(),
321 {'first':'first', 'second':'second'})
322
323 def test_expand(self):
324 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
325 "first second")
326 .expand(r"\2 \1 \g<second> \g<first>"),
327 "second first second first")
328
329 def test_repeat_minmax(self):
330 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
331 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
334
335 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
343
344 self.assertEqual(re.match("^x{1}$", "xxx"), None)
345 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
346 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
347 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
348
349 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
357
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000358 self.assertEqual(re.match("^x{}$", "xxx"), None)
359 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
360
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361 def test_getattr(self):
362 self.assertEqual(re.match("(a)", "a").pos, 0)
363 self.assertEqual(re.match("(a)", "a").endpos, 1)
364 self.assertEqual(re.match("(a)", "a").string, "a")
365 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
366 self.assertNotEqual(re.match("(a)", "a").re, None)
367
368 def test_special_escapes(self):
369 self.assertEqual(re.search(r"\b(b.)\b",
370 "abcd abc bcd bx").group(1), "bx")
371 self.assertEqual(re.search(r"\B(b.)\B",
372 "abc bcd bc abxd").group(1), "bx")
373 self.assertEqual(re.search(r"\b(b.)\b",
374 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
375 self.assertEqual(re.search(r"\B(b.)\B",
376 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
377 self.assertEqual(re.search(r"\b(b.)\b",
378 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
379 self.assertEqual(re.search(r"\B(b.)\B",
380 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
381 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
382 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
384 self.assertEqual(re.search(r"\b(b.)\b",
385 u"abcd abc bcd bx").group(1), "bx")
386 self.assertEqual(re.search(r"\B(b.)\B",
387 u"abc bcd bc abxd").group(1), "bx")
388 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
389 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
390 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
391 self.assertEqual(re.search(r"\d\D\w\W\s\S",
392 "1aa! a").group(0), "1aa! a")
393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a", re.LOCALE).group(0), "1aa! a")
395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a", re.UNICODE).group(0), "1aa! a")
397
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200398 def test_string_boundaries(self):
399 # See http://bugs.python.org/issue10713
400 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
401 "abc")
402 # There's a word boundary at the start of a string.
403 self.assertTrue(re.match(r"\b", "abc"))
404 # A non-empty string includes a non-boundary zero-length match.
405 self.assertTrue(re.search(r"\B", "abc"))
406 # There is no non-boundary match at the start of a string.
407 self.assertFalse(re.match(r"\B", "abc"))
408 # However, an empty string contains no word boundaries, and also no
409 # non-boundaries.
410 self.assertEqual(re.search(r"\B", ""), None)
411 # This one is questionable and different from the perlre behaviour,
412 # but describes current behavior.
413 self.assertEqual(re.search(r"\b", ""), None)
414 # A single word-character string has two boundaries, but no
415 # non-boundary gaps.
416 self.assertEqual(len(re.findall(r"\b", "a")), 2)
417 self.assertEqual(len(re.findall(r"\B", "a")), 0)
418 # If there are no words, there are no boundaries
419 self.assertEqual(len(re.findall(r"\b", " ")), 0)
420 self.assertEqual(len(re.findall(r"\b", " ")), 0)
421 # Can match around the whitespace.
422 self.assertEqual(len(re.findall(r"\B", " ")), 2)
423
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000424 def test_bigcharset(self):
425 self.assertEqual(re.match(u"([\u2222\u2223])",
426 u"\u2222").group(1), u"\u2222")
427 self.assertEqual(re.match(u"([\u2222\u2223])",
428 u"\u2222", re.UNICODE).group(1), u"\u2222")
429
Antoine Pitroub83ea142012-11-20 22:30:42 +0100430 def test_big_codesize(self):
431 # Issue #1160
432 r = re.compile('|'.join(('%d'%x for x in range(10000))))
433 self.assertIsNotNone(r.match('1000'))
434 self.assertIsNotNone(r.match('9999'))
435
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436 def test_anyall(self):
437 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
438 "a\nb")
439 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
440 "a\n\nb")
441
442 def test_non_consuming(self):
443 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
444 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
445 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
446 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
447 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
448 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
449 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
450
451 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
452 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
453 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
454 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
455
456 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000457 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
458 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000459 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
460 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
461 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
462 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
463 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
464 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
465 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
466 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
467
468 def test_category(self):
469 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
470
471 def test_getlower(self):
472 import _sre
473 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
474 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
475 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
476
477 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
478 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
479
480 def test_not_literal(self):
481 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
482 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
483
484 def test_search_coverage(self):
485 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
486 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
487
Ezio Melotti46645632011-03-25 14:50:52 +0200488 def assertMatch(self, pattern, text, match=None, span=None,
489 matcher=re.match):
490 if match is None and span is None:
491 # the pattern matches the whole text
492 match = text
493 span = (0, len(text))
494 elif match is None or span is None:
495 raise ValueError('If match is not None, span should be specified '
496 '(and vice versa).')
497 m = matcher(pattern, text)
498 self.assertTrue(m)
499 self.assertEqual(m.group(), match)
500 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000501
Ezio Melotti46645632011-03-25 14:50:52 +0200502 def test_re_escape(self):
503 alnum_chars = string.ascii_letters + string.digits
504 p = u''.join(unichr(i) for i in range(256))
505 for c in p:
506 if c in alnum_chars:
507 self.assertEqual(re.escape(c), c)
508 elif c == u'\x00':
509 self.assertEqual(re.escape(c), u'\\000')
510 else:
511 self.assertEqual(re.escape(c), u'\\' + c)
512 self.assertMatch(re.escape(c), c)
513 self.assertMatch(re.escape(p), p)
514
515 def test_re_escape_byte(self):
516 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
517 p = ''.join(chr(i) for i in range(256))
518 for b in p:
519 if b in alnum_chars:
520 self.assertEqual(re.escape(b), b)
521 elif b == b'\x00':
522 self.assertEqual(re.escape(b), b'\\000')
523 else:
524 self.assertEqual(re.escape(b), b'\\' + b)
525 self.assertMatch(re.escape(b), b)
526 self.assertMatch(re.escape(p), p)
527
528 def test_re_escape_non_ascii(self):
529 s = u'xxx\u2620\u2620\u2620xxx'
530 s_escaped = re.escape(s)
531 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
532 self.assertMatch(s_escaped, s)
533 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
534 u'x\u2620\u2620\u2620x', (2, 7), re.search)
535
536 def test_re_escape_non_ascii_bytes(self):
537 b = u'y\u2620y\u2620y'.encode('utf-8')
538 b_escaped = re.escape(b)
539 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
540 self.assertMatch(b_escaped, b)
541 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
542 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000543
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000544 def test_pickling(self):
545 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000546 self.pickle_test(pickle)
547 import cPickle
548 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000549 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000550 import_module("sre", deprecated=True)
551 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000552
553 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000554 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
555 s = pickle.dumps(oldpat)
556 newpat = pickle.loads(s)
557 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000558
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000559 def test_constants(self):
560 self.assertEqual(re.I, re.IGNORECASE)
561 self.assertEqual(re.L, re.LOCALE)
562 self.assertEqual(re.M, re.MULTILINE)
563 self.assertEqual(re.S, re.DOTALL)
564 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000565
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000566 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000567 for flag in [re.I, re.M, re.X, re.S, re.L]:
568 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000569
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000570 def test_sre_character_literals(self):
571 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
572 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
573 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
574 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
575 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
576 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
577 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
578 self.assertRaises(re.error, re.match, "\911", "")
579
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000580 def test_sre_character_class_literals(self):
581 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
582 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
583 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
584 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
585 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
588 self.assertRaises(re.error, re.match, "[\911]", "")
589
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000590 def test_bug_113254(self):
591 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
592 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
593 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
594
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000595 def test_bug_527371(self):
596 # bug described in patches 527371/672491
597 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
598 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
599 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
600 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
601 self.assertEqual(re.match("((a))", "a").lastindex, 1)
602
603 def test_bug_545855(self):
604 # bug 545855 -- This pattern failed to cause a compile error as it
605 # should, instead provoking a TypeError.
606 self.assertRaises(re.error, re.compile, 'foo[a-')
607
608 def test_bug_418626(self):
609 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
610 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
611 # pattern '*?' on a long string.
612 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
613 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
614 20003)
615 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000616 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000617 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000618 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000619
620 def test_bug_612074(self):
621 pat=u"["+re.escape(u"\u2039")+u"]"
622 self.assertEqual(re.compile(pat) and 1, 1)
623
Skip Montanaro1e703c62003-04-25 15:40:28 +0000624 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000625 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000626 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000627 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
628 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
629 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000630
631 def test_scanner(self):
632 def s_ident(scanner, token): return token
633 def s_operator(scanner, token): return "op%s" % token
634 def s_float(scanner, token): return float(token)
635 def s_int(scanner, token): return int(token)
636
637 scanner = Scanner([
638 (r"[a-zA-Z_]\w*", s_ident),
639 (r"\d+\.\d*", s_float),
640 (r"\d+", s_int),
641 (r"=|\+|-|\*|/", s_operator),
642 (r"\s+", None),
643 ])
644
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000645 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
646
Skip Montanaro1e703c62003-04-25 15:40:28 +0000647 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
648 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
649 'op+', 'bar'], ''))
650
Skip Montanaro5ba00542003-04-25 16:00:14 +0000651 def test_bug_448951(self):
652 # bug 448951 (similar to 429357, but with single char match)
653 # (Also test greedy matches.)
654 for op in '','?','*':
655 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
656 (None, None))
657 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
658 ('a:', 'a'))
659
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000660 def test_bug_725106(self):
661 # capturing groups in alternatives in repeats
662 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
663 ('b', 'a'))
664 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
665 ('c', 'b'))
666 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
667 ('b', None))
668 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
669 ('b', None))
670 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
671 ('b', 'a'))
672 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
673 ('c', 'b'))
674 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
675 ('b', None))
676 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
677 ('b', None))
678
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000679 def test_bug_725149(self):
680 # mark_stack_base restoring before restoring marks
681 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
682 ('a', None))
683 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
684 ('a', None, None))
685
Just van Rossum12723ba2003-07-02 20:03:04 +0000686 def test_bug_764548(self):
687 # bug 764548, re.compile() barfs on str/unicode subclasses
688 try:
689 unicode
690 except NameError:
691 return # no problem if we have no unicode
692 class my_unicode(unicode): pass
693 pat = re.compile(my_unicode("abc"))
694 self.assertEqual(pat.match("xyz"), None)
695
Skip Montanaro5ba00542003-04-25 16:00:14 +0000696 def test_finditer(self):
697 iter = re.finditer(r":+", "a:b::c:::d")
698 self.assertEqual([item.group(0) for item in iter],
699 [":", "::", ":::"])
700
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000701 def test_bug_926075(self):
702 try:
703 unicode
704 except NameError:
705 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000706 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000707 re.compile(eval("u'bug_926075'")))
708
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000709 def test_bug_931848(self):
710 try:
711 unicode
712 except NameError:
713 pass
714 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
715 self.assertEqual(re.compile(pattern).split("a.b.c"),
716 ['a','b','c'])
717
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000718 def test_bug_581080(self):
719 iter = re.finditer(r"\s", "a b")
720 self.assertEqual(iter.next().span(), (1,2))
721 self.assertRaises(StopIteration, iter.next)
722
723 scanner = re.compile(r"\s").scanner("a b")
724 self.assertEqual(scanner.search().span(), (1, 2))
725 self.assertEqual(scanner.search(), None)
726
727 def test_bug_817234(self):
728 iter = re.finditer(r".*", "asdf")
729 self.assertEqual(iter.next().span(), (0, 4))
730 self.assertEqual(iter.next().span(), (4, 4))
731 self.assertRaises(StopIteration, iter.next)
732
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000733 def test_bug_6561(self):
734 # '\d' should match characters in Unicode category 'Nd'
735 # (Number, Decimal Digit), but not those in 'Nl' (Number,
736 # Letter) or 'No' (Number, Other).
737 decimal_digits = [
738 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
739 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
740 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
741 ]
742 for x in decimal_digits:
743 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
744
745 not_decimal_digits = [
746 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
747 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
748 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
749 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
750 ]
751 for x in not_decimal_digits:
752 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
753
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000754 def test_empty_array(self):
755 # SF buf 1647541
756 import array
757 for typecode in 'cbBuhHiIlLfd':
758 a = array.array(typecode)
759 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000760 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000761
Guido van Rossumae04c332008-01-03 19:12:44 +0000762 def test_inline_flags(self):
763 # Bug #1700
764 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
765 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
766
767 p = re.compile(upper_char, re.I | re.U)
768 q = p.match(lower_char)
769 self.assertNotEqual(q, None)
770
771 p = re.compile(lower_char, re.I | re.U)
772 q = p.match(upper_char)
773 self.assertNotEqual(q, None)
774
775 p = re.compile('(?i)' + upper_char, re.U)
776 q = p.match(lower_char)
777 self.assertNotEqual(q, None)
778
779 p = re.compile('(?i)' + lower_char, re.U)
780 q = p.match(upper_char)
781 self.assertNotEqual(q, None)
782
783 p = re.compile('(?iu)' + upper_char)
784 q = p.match(lower_char)
785 self.assertNotEqual(q, None)
786
787 p = re.compile('(?iu)' + lower_char)
788 q = p.match(upper_char)
789 self.assertNotEqual(q, None)
790
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000791 def test_dollar_matches_twice(self):
792 "$ matches the end of string, and just before the terminating \n"
793 pattern = re.compile('$')
794 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
795 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
796 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
797
798 pattern = re.compile('$', re.MULTILINE)
799 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
800 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
801 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
802
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000803 def test_dealloc(self):
804 # issue 3299: check for segfault in debug build
805 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000806 # the overflow limit is different on wide and narrow builds and it
807 # depends on the definition of SRE_CODE (see sre.h).
808 # 2**128 should be big enough to overflow on both. For smaller values
809 # a RuntimeError is raised instead of OverflowError.
810 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000811 self.assertRaises(TypeError, re.finditer, "a", {})
812 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000813
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200814 def test_compile(self):
815 # Test return value when given string and pattern as parameter
816 pattern = re.compile('random pattern')
817 self.assertIsInstance(pattern, re._pattern_type)
818 same_pattern = re.compile(pattern)
819 self.assertIsInstance(same_pattern, re._pattern_type)
820 self.assertIs(same_pattern, pattern)
821 # Test behaviour when not given a string or pattern as parameter
822 self.assertRaises(TypeError, re.compile, 0)
823
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100824 @precisionbigmemtest(size=_2G, memuse=1)
825 def test_large_search(self, size):
826 # Issue #10182: indices were 32-bit-truncated.
827 s = 'a' * size
828 m = re.search('$', s)
829 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100830 self.assertEqual(m.start(), size)
831 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100832
Antoine Pitroub83575b2012-12-02 12:52:36 +0100833 # The huge memuse is because of re.sub() using a list and a join()
834 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100835 @precisionbigmemtest(size=_2G, memuse=16 + 2)
836 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100837 # Issue #10182: indices were 32-bit-truncated.
838 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100839 r, n = re.subn('', '', s)
840 self.assertEqual(r, s)
841 self.assertEqual(n, size + 1)
842
843
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000844def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000845 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000846 if verbose:
847 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000848 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000849 # To save time, only run the first and last 10 tests
850 #tests = tests[:10] + tests[-10:]
851 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000852
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000853 for t in tests:
854 sys.stdout.flush()
855 pattern = s = outcome = repl = expected = None
856 if len(t) == 5:
857 pattern, s, outcome, repl, expected = t
858 elif len(t) == 3:
859 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000860 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000861 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
862
Guido van Rossum41360a41998-03-26 19:42:58 +0000863 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000864 obj = re.compile(pattern)
865 except re.error:
866 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000867 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000868 print '=== Syntax error:', t
869 except KeyboardInterrupt: raise KeyboardInterrupt
870 except:
871 print '*** Unexpected error ***', t
872 if verbose:
873 traceback.print_exc(file=sys.stdout)
874 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000875 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000876 result = obj.search(s)
877 except re.error, msg:
878 print '=== Unexpected exception', t, repr(msg)
879 if outcome == SYNTAX_ERROR:
880 # This should have been a syntax error; forget it.
881 pass
882 elif outcome == FAIL:
883 if result is None: pass # No match, as expected
884 else: print '=== Succeeded incorrectly', t
885 elif outcome == SUCCEED:
886 if result is not None:
887 # Matched, as expected, so now we compute the
888 # result string and compare it to our expected result.
889 start, end = result.span(0)
890 vardict={'found': result.group(0),
891 'groups': result.group(),
892 'flags': result.re.flags}
893 for i in range(1, 100):
894 try:
895 gi = result.group(i)
896 # Special hack because else the string concat fails:
897 if gi is None:
898 gi = "None"
899 except IndexError:
900 gi = "Error"
901 vardict['g%d' % i] = gi
902 for i in result.re.groupindex.keys():
903 try:
904 gi = result.group(i)
905 if gi is None:
906 gi = "None"
907 except IndexError:
908 gi = "Error"
909 vardict[i] = gi
910 repl = eval(repl, vardict)
911 if repl != expected:
912 print '=== grouping error', t,
913 print repr(repl) + ' should be ' + repr(expected)
914 else:
915 print '=== Failed incorrectly', t
916
917 # Try the match on a unicode string, and check that it
918 # still succeeds.
919 try:
920 result = obj.search(unicode(s, "latin-1"))
921 if result is None:
922 print '=== Fails on unicode match', t
923 except NameError:
924 continue # 1.5.2
925 except TypeError:
926 continue # unicode test case
927
928 # Try the match on a unicode pattern, and check that it
929 # still succeeds.
930 obj=re.compile(unicode(pattern, "latin-1"))
931 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000932 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000933 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000934
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000935 # Try the match with the search area limited to the extent
936 # of the match and see if it still succeeds. \B will
937 # break (because it won't match at the end or start of a
938 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000939
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000940 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
941 and result is not None:
942 obj = re.compile(pattern)
943 result = obj.search(s, result.start(0), result.end(0) + 1)
944 if result is None:
945 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000946
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000947 # Try the match with IGNORECASE enabled, and check that it
948 # still succeeds.
949 obj = re.compile(pattern, re.IGNORECASE)
950 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000951 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000952 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000953
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000954 # Try the match with LOCALE enabled, and check that it
955 # still succeeds.
956 obj = re.compile(pattern, re.LOCALE)
957 result = obj.search(s)
958 if result is None:
959 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000960
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000961 # Try the match with UNICODE locale enabled, and check
962 # that it still succeeds.
963 obj = re.compile(pattern, re.UNICODE)
964 result = obj.search(s)
965 if result is None:
966 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000967
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000968def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000969 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000970 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000971
972if __name__ == "__main__":
973 test_main()