blob: 0bceaa292fb40e7a78b1ff28edacac55db9ef03a [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00003from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02004import sys
5import string
6import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00007from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00008
Guido van Rossum23b22571997-07-17 22:36:14 +00009# Misc tests from Tim Peters' re.doc
10
Just van Rossum6802c6e2003-07-02 14:36:59 +000011# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020012# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# cover most of the code.
14
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000016
Skip Montanaro8ed06da2003-04-24 19:43:18 +000017class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000018
19 def test_weakref(self):
20 s = 'QabbbcR'
21 x = re.compile('ab+c')
22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24
Skip Montanaro8ed06da2003-04-24 19:43:18 +000025 def test_search_star_plus(self):
26 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000030 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000031 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000035 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000036
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000038 int_value = int(matchobj.group(0))
39 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def test_basic_re_sub(self):
42 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y')
45 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000047
Skip Montanaro8ed06da2003-04-24 19:43:18 +000048 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000050
Skip Montanaro8ed06da2003-04-24 19:43:18 +000051 s = r"\1\1"
52 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
54 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000066
Skip Montanaro8ed06da2003-04-24 19:43:18 +000067 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000068
Skip Montanaro2726fcd2003-04-25 14:31:54 +000069 def test_bug_449964(self):
70 # fails for group followed by other escape
71 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b')
73
74 def test_bug_449000(self):
75 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000084
Guido van Rossum1ff91d92007-09-10 22:02:25 +000085 def test_bug_1140(self):
86 # re.sub(x, y, u'') should return u'', not '', and
87 # re.sub(x, y, '') should return '', not u''.
88 # Also:
89 # re.sub(x, y, unicode(x)) should return unicode(y), and
90 # re.sub(x, y, str(x)) should return
91 # str(y) if isinstance(y, str) else unicode(y).
92 for x in 'x', u'x':
93 for y in 'y', u'y':
94 z = re.sub(x, y, u'')
95 self.assertEqual(z, u'')
96 self.assertEqual(type(z), unicode)
97 #
98 z = re.sub(x, y, '')
99 self.assertEqual(z, '')
100 self.assertEqual(type(z), str)
101 #
102 z = re.sub(x, y, unicode(x))
103 self.assertEqual(z, y)
104 self.assertEqual(type(z), unicode)
105 #
106 z = re.sub(x, y, str(x))
107 self.assertEqual(z, y)
108 self.assertEqual(type(z), type(y))
109
Raymond Hettinger80016c92007-12-19 18:13:31 +0000110 def test_bug_1661(self):
111 # Verify that flags do not get silently ignored with compiled patterns
112 pattern = re.compile('.')
113 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
114 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.compile, pattern, re.I)
117
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000118 def test_bug_3629(self):
119 # A regex that triggered a bug in the sre-code validator
120 re.compile("(?P<quote>)(?(quote))")
121
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000122 def test_sub_template_numeric_escape(self):
123 # bug 776311 and friends
124 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
125 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
126 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
127 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
128 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
129 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
130 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
131
132 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
134
135 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
136 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
137 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
138 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
139 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
140
141 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000143
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000144 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
153 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
155 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
156
157 # in python2.3 (etc), these loop endlessly in sre_parser.py
158 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
159 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
160 'xz8')
161 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
162 'xza')
163
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000164 def test_qualified_re_sub(self):
165 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
166 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000167
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000168 def test_bug_114660(self):
169 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
170 'hello there')
171
172 def test_bug_462270(self):
173 # Test for empty sub() behaviour, see SF bug #462270
174 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
175 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
176
Ezio Melottief317382012-11-03 20:31:12 +0200177 def test_symbolic_groups(self):
178 re.compile('(?P<a>x)(?P=a)(?(a)y)')
179 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
180 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
181 self.assertRaises(re.error, re.compile, '(?Px)')
182 self.assertRaises(re.error, re.compile, '(?P=)')
183 self.assertRaises(re.error, re.compile, '(?P=1)')
184 self.assertRaises(re.error, re.compile, '(?P=a)')
185 self.assertRaises(re.error, re.compile, '(?P=a1)')
186 self.assertRaises(re.error, re.compile, '(?P=a.)')
187 self.assertRaises(re.error, re.compile, '(?P<)')
188 self.assertRaises(re.error, re.compile, '(?P<>)')
189 self.assertRaises(re.error, re.compile, '(?P<1>)')
190 self.assertRaises(re.error, re.compile, '(?P<a.>)')
191 self.assertRaises(re.error, re.compile, '(?())')
192 self.assertRaises(re.error, re.compile, '(?(a))')
193 self.assertRaises(re.error, re.compile, '(?(1a))')
194 self.assertRaises(re.error, re.compile, '(?(a.))')
195
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000196 def test_symbolic_refs(self):
197 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
198 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
203 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000207
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 def test_re_subn(self):
209 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
210 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
211 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
212 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
213 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000214
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000215 def test_re_split(self):
216 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
217 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
218 self.assertEqual(re.split("(:*)", ":a:b::c"),
219 ['', ':', 'a', ':', 'b', '::', 'c'])
220 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
221 self.assertEqual(re.split("(:)*", ":a:b::c"),
222 ['', ':', 'a', ':', 'b', ':', 'c'])
223 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
224 ['', ':', 'a', ':b::', 'c'])
225 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
226 ['', None, ':', 'a', None, ':', '', 'b', None, '',
227 None, '::', 'c'])
228 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
229 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000230
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000231 def test_qualified_re_split(self):
232 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
233 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
234 self.assertEqual(re.split("(:)", ":a:b::c", 2),
235 ['', ':', 'a', ':', 'b::c'])
236 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
237 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000238
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000239 def test_re_findall(self):
240 self.assertEqual(re.findall(":+", "abc"), [])
241 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
242 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
243 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
244 (":", ":"),
245 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000246
Skip Montanaro5ba00542003-04-25 16:00:14 +0000247 def test_bug_117612(self):
248 self.assertEqual(re.findall(r"(a|(b))", "aba"),
249 [("a", ""),("b", "b"),("a", "")])
250
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000251 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000252 self.assertEqual(re.match('a', 'a').groups(), ())
253 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
254 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
255 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
256 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000257
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000258 pat = re.compile('((a)|(b))(c)?')
259 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
260 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
261 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
262 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
263 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000264
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000265 # A single group
266 m = re.match('(a)', 'a')
267 self.assertEqual(m.group(0), 'a')
268 self.assertEqual(m.group(0), 'a')
269 self.assertEqual(m.group(1), 'a')
270 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000271
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000272 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
273 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
274 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
275 (None, 'b', None))
276 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000277
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000278 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000279 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
280 ('(', 'a'))
281 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
282 (None, 'a'))
283 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
285 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
286 ('a', 'b'))
287 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
288 (None, 'd'))
289 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
290 (None, 'd'))
291 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
292 ('a', ''))
293
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000294 # Tests for bug #1177831: exercise groups other than the first group
295 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
296 self.assertEqual(p.match('abc').groups(),
297 ('a', 'b', 'c'))
298 self.assertEqual(p.match('ad').groups(),
299 ('a', None, 'd'))
300 self.assertEqual(p.match('abd'), None)
301 self.assertEqual(p.match('ac'), None)
302
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000303
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000304 def test_re_groupref(self):
305 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
306 ('|', 'a'))
307 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
308 (None, 'a'))
309 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
311 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
312 ('a', 'a'))
313 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
314 (None, None))
315
316 def test_groupdict(self):
317 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
318 'first second').groupdict(),
319 {'first':'first', 'second':'second'})
320
321 def test_expand(self):
322 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
323 "first second")
324 .expand(r"\2 \1 \g<second> \g<first>"),
325 "second first second first")
326
327 def test_repeat_minmax(self):
328 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
329 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
330 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
331 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
332
333 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
334 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
335 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
341
342 self.assertEqual(re.match("^x{1}$", "xxx"), None)
343 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
344 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
345 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
346
347 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
348 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
349 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
355
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000356 self.assertEqual(re.match("^x{}$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
358
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000359 def test_getattr(self):
360 self.assertEqual(re.match("(a)", "a").pos, 0)
361 self.assertEqual(re.match("(a)", "a").endpos, 1)
362 self.assertEqual(re.match("(a)", "a").string, "a")
363 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
364 self.assertNotEqual(re.match("(a)", "a").re, None)
365
366 def test_special_escapes(self):
367 self.assertEqual(re.search(r"\b(b.)\b",
368 "abcd abc bcd bx").group(1), "bx")
369 self.assertEqual(re.search(r"\B(b.)\B",
370 "abc bcd bc abxd").group(1), "bx")
371 self.assertEqual(re.search(r"\b(b.)\b",
372 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
373 self.assertEqual(re.search(r"\B(b.)\B",
374 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
375 self.assertEqual(re.search(r"\b(b.)\b",
376 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
377 self.assertEqual(re.search(r"\B(b.)\B",
378 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
379 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
380 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
381 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
382 self.assertEqual(re.search(r"\b(b.)\b",
383 u"abcd abc bcd bx").group(1), "bx")
384 self.assertEqual(re.search(r"\B(b.)\B",
385 u"abc bcd bc abxd").group(1), "bx")
386 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
387 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
388 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
389 self.assertEqual(re.search(r"\d\D\w\W\s\S",
390 "1aa! a").group(0), "1aa! a")
391 self.assertEqual(re.search(r"\d\D\w\W\s\S",
392 "1aa! a", re.LOCALE).group(0), "1aa! a")
393 self.assertEqual(re.search(r"\d\D\w\W\s\S",
394 "1aa! a", re.UNICODE).group(0), "1aa! a")
395
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200396 def test_string_boundaries(self):
397 # See http://bugs.python.org/issue10713
398 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
399 "abc")
400 # There's a word boundary at the start of a string.
401 self.assertTrue(re.match(r"\b", "abc"))
402 # A non-empty string includes a non-boundary zero-length match.
403 self.assertTrue(re.search(r"\B", "abc"))
404 # There is no non-boundary match at the start of a string.
405 self.assertFalse(re.match(r"\B", "abc"))
406 # However, an empty string contains no word boundaries, and also no
407 # non-boundaries.
408 self.assertEqual(re.search(r"\B", ""), None)
409 # This one is questionable and different from the perlre behaviour,
410 # but describes current behavior.
411 self.assertEqual(re.search(r"\b", ""), None)
412 # A single word-character string has two boundaries, but no
413 # non-boundary gaps.
414 self.assertEqual(len(re.findall(r"\b", "a")), 2)
415 self.assertEqual(len(re.findall(r"\B", "a")), 0)
416 # If there are no words, there are no boundaries
417 self.assertEqual(len(re.findall(r"\b", " ")), 0)
418 self.assertEqual(len(re.findall(r"\b", " ")), 0)
419 # Can match around the whitespace.
420 self.assertEqual(len(re.findall(r"\B", " ")), 2)
421
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000422 def test_bigcharset(self):
423 self.assertEqual(re.match(u"([\u2222\u2223])",
424 u"\u2222").group(1), u"\u2222")
425 self.assertEqual(re.match(u"([\u2222\u2223])",
426 u"\u2222", re.UNICODE).group(1), u"\u2222")
427
428 def test_anyall(self):
429 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
430 "a\nb")
431 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
432 "a\n\nb")
433
434 def test_non_consuming(self):
435 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
436 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
437 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
438 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
439 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
440 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
441 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
442
443 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
444 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
445 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
446 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
447
448 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000449 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
450 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000451 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
452 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
453 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
454 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
455 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
456 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
457 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
458 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
459
460 def test_category(self):
461 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
462
463 def test_getlower(self):
464 import _sre
465 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
466 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
467 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
468
469 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
470 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
471
472 def test_not_literal(self):
473 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
474 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
475
476 def test_search_coverage(self):
477 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
478 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
479
Ezio Melotti46645632011-03-25 14:50:52 +0200480 def assertMatch(self, pattern, text, match=None, span=None,
481 matcher=re.match):
482 if match is None and span is None:
483 # the pattern matches the whole text
484 match = text
485 span = (0, len(text))
486 elif match is None or span is None:
487 raise ValueError('If match is not None, span should be specified '
488 '(and vice versa).')
489 m = matcher(pattern, text)
490 self.assertTrue(m)
491 self.assertEqual(m.group(), match)
492 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000493
Ezio Melotti46645632011-03-25 14:50:52 +0200494 def test_re_escape(self):
495 alnum_chars = string.ascii_letters + string.digits
496 p = u''.join(unichr(i) for i in range(256))
497 for c in p:
498 if c in alnum_chars:
499 self.assertEqual(re.escape(c), c)
500 elif c == u'\x00':
501 self.assertEqual(re.escape(c), u'\\000')
502 else:
503 self.assertEqual(re.escape(c), u'\\' + c)
504 self.assertMatch(re.escape(c), c)
505 self.assertMatch(re.escape(p), p)
506
507 def test_re_escape_byte(self):
508 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
509 p = ''.join(chr(i) for i in range(256))
510 for b in p:
511 if b in alnum_chars:
512 self.assertEqual(re.escape(b), b)
513 elif b == b'\x00':
514 self.assertEqual(re.escape(b), b'\\000')
515 else:
516 self.assertEqual(re.escape(b), b'\\' + b)
517 self.assertMatch(re.escape(b), b)
518 self.assertMatch(re.escape(p), p)
519
520 def test_re_escape_non_ascii(self):
521 s = u'xxx\u2620\u2620\u2620xxx'
522 s_escaped = re.escape(s)
523 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
524 self.assertMatch(s_escaped, s)
525 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
526 u'x\u2620\u2620\u2620x', (2, 7), re.search)
527
528 def test_re_escape_non_ascii_bytes(self):
529 b = u'y\u2620y\u2620y'.encode('utf-8')
530 b_escaped = re.escape(b)
531 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
532 self.assertMatch(b_escaped, b)
533 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
534 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000535
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000536 def test_pickling(self):
537 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000538 self.pickle_test(pickle)
539 import cPickle
540 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000541 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000542 import_module("sre", deprecated=True)
543 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000544
545 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000546 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
547 s = pickle.dumps(oldpat)
548 newpat = pickle.loads(s)
549 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000550
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000551 def test_constants(self):
552 self.assertEqual(re.I, re.IGNORECASE)
553 self.assertEqual(re.L, re.LOCALE)
554 self.assertEqual(re.M, re.MULTILINE)
555 self.assertEqual(re.S, re.DOTALL)
556 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000557
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000558 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000559 for flag in [re.I, re.M, re.X, re.S, re.L]:
560 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000561
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000562 def test_sre_character_literals(self):
563 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
564 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
565 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
566 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
567 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
568 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
569 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
570 self.assertRaises(re.error, re.match, "\911", "")
571
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000572 def test_sre_character_class_literals(self):
573 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
574 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
575 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
576 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
577 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
578 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
579 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
580 self.assertRaises(re.error, re.match, "[\911]", "")
581
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000582 def test_bug_113254(self):
583 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
584 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
585 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
586
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000587 def test_bug_527371(self):
588 # bug described in patches 527371/672491
589 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
590 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
591 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
592 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
593 self.assertEqual(re.match("((a))", "a").lastindex, 1)
594
595 def test_bug_545855(self):
596 # bug 545855 -- This pattern failed to cause a compile error as it
597 # should, instead provoking a TypeError.
598 self.assertRaises(re.error, re.compile, 'foo[a-')
599
600 def test_bug_418626(self):
601 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
602 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
603 # pattern '*?' on a long string.
604 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
605 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
606 20003)
607 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000608 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000609 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000610 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000611
612 def test_bug_612074(self):
613 pat=u"["+re.escape(u"\u2039")+u"]"
614 self.assertEqual(re.compile(pat) and 1, 1)
615
Skip Montanaro1e703c62003-04-25 15:40:28 +0000616 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000617 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000618 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000619 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
620 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
621 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000622
623 def test_scanner(self):
624 def s_ident(scanner, token): return token
625 def s_operator(scanner, token): return "op%s" % token
626 def s_float(scanner, token): return float(token)
627 def s_int(scanner, token): return int(token)
628
629 scanner = Scanner([
630 (r"[a-zA-Z_]\w*", s_ident),
631 (r"\d+\.\d*", s_float),
632 (r"\d+", s_int),
633 (r"=|\+|-|\*|/", s_operator),
634 (r"\s+", None),
635 ])
636
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000637 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
638
Skip Montanaro1e703c62003-04-25 15:40:28 +0000639 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
640 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
641 'op+', 'bar'], ''))
642
Skip Montanaro5ba00542003-04-25 16:00:14 +0000643 def test_bug_448951(self):
644 # bug 448951 (similar to 429357, but with single char match)
645 # (Also test greedy matches.)
646 for op in '','?','*':
647 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
648 (None, None))
649 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
650 ('a:', 'a'))
651
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000652 def test_bug_725106(self):
653 # capturing groups in alternatives in repeats
654 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
655 ('b', 'a'))
656 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
657 ('c', 'b'))
658 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
659 ('b', None))
660 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
661 ('b', None))
662 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
663 ('b', 'a'))
664 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
665 ('c', 'b'))
666 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
667 ('b', None))
668 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
669 ('b', None))
670
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000671 def test_bug_725149(self):
672 # mark_stack_base restoring before restoring marks
673 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
674 ('a', None))
675 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
676 ('a', None, None))
677
Just van Rossum12723ba2003-07-02 20:03:04 +0000678 def test_bug_764548(self):
679 # bug 764548, re.compile() barfs on str/unicode subclasses
680 try:
681 unicode
682 except NameError:
683 return # no problem if we have no unicode
684 class my_unicode(unicode): pass
685 pat = re.compile(my_unicode("abc"))
686 self.assertEqual(pat.match("xyz"), None)
687
Skip Montanaro5ba00542003-04-25 16:00:14 +0000688 def test_finditer(self):
689 iter = re.finditer(r":+", "a:b::c:::d")
690 self.assertEqual([item.group(0) for item in iter],
691 [":", "::", ":::"])
692
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000693 def test_bug_926075(self):
694 try:
695 unicode
696 except NameError:
697 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000698 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000699 re.compile(eval("u'bug_926075'")))
700
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000701 def test_bug_931848(self):
702 try:
703 unicode
704 except NameError:
705 pass
706 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
707 self.assertEqual(re.compile(pattern).split("a.b.c"),
708 ['a','b','c'])
709
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000710 def test_bug_581080(self):
711 iter = re.finditer(r"\s", "a b")
712 self.assertEqual(iter.next().span(), (1,2))
713 self.assertRaises(StopIteration, iter.next)
714
715 scanner = re.compile(r"\s").scanner("a b")
716 self.assertEqual(scanner.search().span(), (1, 2))
717 self.assertEqual(scanner.search(), None)
718
719 def test_bug_817234(self):
720 iter = re.finditer(r".*", "asdf")
721 self.assertEqual(iter.next().span(), (0, 4))
722 self.assertEqual(iter.next().span(), (4, 4))
723 self.assertRaises(StopIteration, iter.next)
724
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000725 def test_bug_6561(self):
726 # '\d' should match characters in Unicode category 'Nd'
727 # (Number, Decimal Digit), but not those in 'Nl' (Number,
728 # Letter) or 'No' (Number, Other).
729 decimal_digits = [
730 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
731 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
732 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
733 ]
734 for x in decimal_digits:
735 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
736
737 not_decimal_digits = [
738 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
739 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
740 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
741 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
742 ]
743 for x in not_decimal_digits:
744 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
745
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000746 def test_empty_array(self):
747 # SF buf 1647541
748 import array
749 for typecode in 'cbBuhHiIlLfd':
750 a = array.array(typecode)
751 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000752 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000753
Guido van Rossumae04c332008-01-03 19:12:44 +0000754 def test_inline_flags(self):
755 # Bug #1700
756 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
757 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
758
759 p = re.compile(upper_char, re.I | re.U)
760 q = p.match(lower_char)
761 self.assertNotEqual(q, None)
762
763 p = re.compile(lower_char, re.I | re.U)
764 q = p.match(upper_char)
765 self.assertNotEqual(q, None)
766
767 p = re.compile('(?i)' + upper_char, re.U)
768 q = p.match(lower_char)
769 self.assertNotEqual(q, None)
770
771 p = re.compile('(?i)' + lower_char, re.U)
772 q = p.match(upper_char)
773 self.assertNotEqual(q, None)
774
775 p = re.compile('(?iu)' + upper_char)
776 q = p.match(lower_char)
777 self.assertNotEqual(q, None)
778
779 p = re.compile('(?iu)' + lower_char)
780 q = p.match(upper_char)
781 self.assertNotEqual(q, None)
782
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000783 def test_dollar_matches_twice(self):
784 "$ matches the end of string, and just before the terminating \n"
785 pattern = re.compile('$')
786 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
787 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
788 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
789
790 pattern = re.compile('$', re.MULTILINE)
791 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
792 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
793 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
794
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000795 def test_dealloc(self):
796 # issue 3299: check for segfault in debug build
797 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000798 # the overflow limit is different on wide and narrow builds and it
799 # depends on the definition of SRE_CODE (see sre.h).
800 # 2**128 should be big enough to overflow on both. For smaller values
801 # a RuntimeError is raised instead of OverflowError.
802 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000803 self.assertRaises(TypeError, re.finditer, "a", {})
804 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000805
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200806 def test_compile(self):
807 # Test return value when given string and pattern as parameter
808 pattern = re.compile('random pattern')
809 self.assertIsInstance(pattern, re._pattern_type)
810 same_pattern = re.compile(pattern)
811 self.assertIsInstance(same_pattern, re._pattern_type)
812 self.assertIs(same_pattern, pattern)
813 # Test behaviour when not given a string or pattern as parameter
814 self.assertRaises(TypeError, re.compile, 0)
815
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000816def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000817 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000818 if verbose:
819 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000820 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000821 # To save time, only run the first and last 10 tests
822 #tests = tests[:10] + tests[-10:]
823 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000824
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000825 for t in tests:
826 sys.stdout.flush()
827 pattern = s = outcome = repl = expected = None
828 if len(t) == 5:
829 pattern, s, outcome, repl, expected = t
830 elif len(t) == 3:
831 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000832 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000833 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
834
Guido van Rossum41360a41998-03-26 19:42:58 +0000835 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000836 obj = re.compile(pattern)
837 except re.error:
838 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000839 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000840 print '=== Syntax error:', t
841 except KeyboardInterrupt: raise KeyboardInterrupt
842 except:
843 print '*** Unexpected error ***', t
844 if verbose:
845 traceback.print_exc(file=sys.stdout)
846 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000847 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000848 result = obj.search(s)
849 except re.error, msg:
850 print '=== Unexpected exception', t, repr(msg)
851 if outcome == SYNTAX_ERROR:
852 # This should have been a syntax error; forget it.
853 pass
854 elif outcome == FAIL:
855 if result is None: pass # No match, as expected
856 else: print '=== Succeeded incorrectly', t
857 elif outcome == SUCCEED:
858 if result is not None:
859 # Matched, as expected, so now we compute the
860 # result string and compare it to our expected result.
861 start, end = result.span(0)
862 vardict={'found': result.group(0),
863 'groups': result.group(),
864 'flags': result.re.flags}
865 for i in range(1, 100):
866 try:
867 gi = result.group(i)
868 # Special hack because else the string concat fails:
869 if gi is None:
870 gi = "None"
871 except IndexError:
872 gi = "Error"
873 vardict['g%d' % i] = gi
874 for i in result.re.groupindex.keys():
875 try:
876 gi = result.group(i)
877 if gi is None:
878 gi = "None"
879 except IndexError:
880 gi = "Error"
881 vardict[i] = gi
882 repl = eval(repl, vardict)
883 if repl != expected:
884 print '=== grouping error', t,
885 print repr(repl) + ' should be ' + repr(expected)
886 else:
887 print '=== Failed incorrectly', t
888
889 # Try the match on a unicode string, and check that it
890 # still succeeds.
891 try:
892 result = obj.search(unicode(s, "latin-1"))
893 if result is None:
894 print '=== Fails on unicode match', t
895 except NameError:
896 continue # 1.5.2
897 except TypeError:
898 continue # unicode test case
899
900 # Try the match on a unicode pattern, and check that it
901 # still succeeds.
902 obj=re.compile(unicode(pattern, "latin-1"))
903 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000904 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000905 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000906
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000907 # Try the match with the search area limited to the extent
908 # of the match and see if it still succeeds. \B will
909 # break (because it won't match at the end or start of a
910 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000911
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000912 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
913 and result is not None:
914 obj = re.compile(pattern)
915 result = obj.search(s, result.start(0), result.end(0) + 1)
916 if result is None:
917 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000918
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000919 # Try the match with IGNORECASE enabled, and check that it
920 # still succeeds.
921 obj = re.compile(pattern, re.IGNORECASE)
922 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000923 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000924 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000925
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000926 # Try the match with LOCALE enabled, and check that it
927 # still succeeds.
928 obj = re.compile(pattern, re.LOCALE)
929 result = obj.search(s)
930 if result is None:
931 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000932
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000933 # Try the match with UNICODE locale enabled, and check
934 # that it still succeeds.
935 obj = re.compile(pattern, re.UNICODE)
936 result = obj.search(s)
937 if result is None:
938 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000939
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000940def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000941 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000942 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000943
944if __name__ == "__main__":
945 test_main()