blob: 7a25dbeec8810f32cd360736a57c3e4011f87ff9 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Serhiy Storchaka7644ff12014-09-14 17:40:44 +03003from test.test_support import captured_stdout, have_unicode, requires_unicode, u
Serhiy Storchakad4c72902014-10-31 00:53:19 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00006from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04007import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02008import sys
9import string
10import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000011from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000012
Antoine Pitrou735f36e2012-12-03 20:53:12 +010013
Guido van Rossum23b22571997-07-17 22:36:14 +000014# Misc tests from Tim Peters' re.doc
15
Just van Rossum6802c6e2003-07-02 14:36:59 +000016# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020017# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000018# cover most of the code.
19
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000021
Skip Montanaro8ed06da2003-04-24 19:43:18 +000022class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000023
24 def test_weakref(self):
25 s = 'QabbbcR'
26 x = re.compile('ab+c')
27 y = proxy(x)
28 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
29
Skip Montanaro8ed06da2003-04-24 19:43:18 +000030 def test_search_star_plus(self):
31 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
32 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
33 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
34 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030035 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000036 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
37 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
38 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
39 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030040 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000043 int_value = int(matchobj.group(0))
44 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000045
Skip Montanaro8ed06da2003-04-24 19:43:18 +000046 def test_basic_re_sub(self):
47 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
49 '9.3 -3 24x100y')
50 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
51 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
54 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 s = r"\1\1"
57 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
58 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
59 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
63 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
64 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000065
Skip Montanaro8ed06da2003-04-24 19:43:18 +000066 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
67 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
68 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
69 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
70 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000071
Skip Montanaro8ed06da2003-04-24 19:43:18 +000072 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000073
Skip Montanaro2726fcd2003-04-25 14:31:54 +000074 def test_bug_449964(self):
75 # fails for group followed by other escape
76 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
77 'xx\bxx\b')
78
79 def test_bug_449000(self):
80 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000081 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
86 'abc\ndef\n')
87 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
88 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000089
Serhiy Storchaka7644ff12014-09-14 17:40:44 +030090 @requires_unicode
Guido van Rossum1ff91d92007-09-10 22:02:25 +000091 def test_bug_1140(self):
92 # re.sub(x, y, u'') should return u'', not '', and
93 # re.sub(x, y, '') should return '', not u''.
94 # Also:
95 # re.sub(x, y, unicode(x)) should return unicode(y), and
96 # re.sub(x, y, str(x)) should return
97 # str(y) if isinstance(y, str) else unicode(y).
98 for x in 'x', u'x':
99 for y in 'y', u'y':
100 z = re.sub(x, y, u'')
101 self.assertEqual(z, u'')
102 self.assertEqual(type(z), unicode)
103 #
104 z = re.sub(x, y, '')
105 self.assertEqual(z, '')
106 self.assertEqual(type(z), str)
107 #
108 z = re.sub(x, y, unicode(x))
109 self.assertEqual(z, y)
110 self.assertEqual(type(z), unicode)
111 #
112 z = re.sub(x, y, str(x))
113 self.assertEqual(z, y)
114 self.assertEqual(type(z), type(y))
115
Raymond Hettinger80016c92007-12-19 18:13:31 +0000116 def test_bug_1661(self):
117 # Verify that flags do not get silently ignored with compiled patterns
118 pattern = re.compile('.')
119 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
121 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
122 self.assertRaises(ValueError, re.compile, pattern, re.I)
123
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000124 def test_bug_3629(self):
125 # A regex that triggered a bug in the sre-code validator
126 re.compile("(?P<quote>)(?(quote))")
127
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000128 def test_sub_template_numeric_escape(self):
129 # bug 776311 and friends
130 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
131 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
132 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
133 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
134 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
135 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
136 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
137
138 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
139 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
140
141 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
142 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
143 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
144 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
145 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
146
147 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
148 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000149
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000150 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
158 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
159 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
160 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
161 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
162
163 # in python2.3 (etc), these loop endlessly in sre_parser.py
164 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
165 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
166 'xz8')
167 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
168 'xza')
169
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000170 def test_qualified_re_sub(self):
171 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
172 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000173
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000174 def test_bug_114660(self):
175 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
176 'hello there')
177
178 def test_bug_462270(self):
179 # Test for empty sub() behaviour, see SF bug #462270
180 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
181 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
182
Ezio Melottief317382012-11-03 20:31:12 +0200183 def test_symbolic_groups(self):
184 re.compile('(?P<a>x)(?P=a)(?(a)y)')
185 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
186 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
187 self.assertRaises(re.error, re.compile, '(?Px)')
188 self.assertRaises(re.error, re.compile, '(?P=)')
189 self.assertRaises(re.error, re.compile, '(?P=1)')
190 self.assertRaises(re.error, re.compile, '(?P=a)')
191 self.assertRaises(re.error, re.compile, '(?P=a1)')
192 self.assertRaises(re.error, re.compile, '(?P=a.)')
193 self.assertRaises(re.error, re.compile, '(?P<)')
194 self.assertRaises(re.error, re.compile, '(?P<>)')
195 self.assertRaises(re.error, re.compile, '(?P<1>)')
196 self.assertRaises(re.error, re.compile, '(?P<a.>)')
197 self.assertRaises(re.error, re.compile, '(?())')
198 self.assertRaises(re.error, re.compile, '(?(a))')
199 self.assertRaises(re.error, re.compile, '(?(1a))')
200 self.assertRaises(re.error, re.compile, '(?(a.))')
201
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 def test_symbolic_refs(self):
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200207 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
209 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
210 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
211 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000212 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000213
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000214 def test_re_subn(self):
215 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
216 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
217 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
218 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
219 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000220
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000221 def test_re_split(self):
222 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
223 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
224 self.assertEqual(re.split("(:*)", ":a:b::c"),
225 ['', ':', 'a', ':', 'b', '::', 'c'])
226 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
227 self.assertEqual(re.split("(:)*", ":a:b::c"),
228 ['', ':', 'a', ':', 'b', ':', 'c'])
229 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
230 ['', ':', 'a', ':b::', 'c'])
231 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
232 ['', None, ':', 'a', None, ':', '', 'b', None, '',
233 None, '::', 'c'])
234 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
235 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000236
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000237 def test_qualified_re_split(self):
238 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
239 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
240 self.assertEqual(re.split("(:)", ":a:b::c", 2),
241 ['', ':', 'a', ':', 'b::c'])
242 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
243 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000244
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000245 def test_re_findall(self):
246 self.assertEqual(re.findall(":+", "abc"), [])
247 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
248 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
249 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
250 (":", ":"),
251 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000252
Skip Montanaro5ba00542003-04-25 16:00:14 +0000253 def test_bug_117612(self):
254 self.assertEqual(re.findall(r"(a|(b))", "aba"),
255 [("a", ""),("b", "b"),("a", "")])
256
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000257 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000258 self.assertEqual(re.match('a', 'a').groups(), ())
259 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
260 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
261 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
262 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000263
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000264 pat = re.compile('((a)|(b))(c)?')
265 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
266 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
267 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
268 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
269 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000270
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000271 # A single group
272 m = re.match('(a)', 'a')
273 self.assertEqual(m.group(0), 'a')
274 self.assertEqual(m.group(0), 'a')
275 self.assertEqual(m.group(1), 'a')
276 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000277
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000278 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
279 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
280 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
281 (None, 'b', None))
282 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000283
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
286 ('(', 'a'))
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
288 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300289 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
290 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000291 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
292 ('a', 'b'))
293 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
294 (None, 'd'))
295 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
296 (None, 'd'))
297 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
298 ('a', ''))
299
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000300 # Tests for bug #1177831: exercise groups other than the first group
301 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
302 self.assertEqual(p.match('abc').groups(),
303 ('a', 'b', 'c'))
304 self.assertEqual(p.match('ad').groups(),
305 ('a', None, 'd'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300306 self.assertIsNone(p.match('abd'))
307 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000308
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000309
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000310 def test_re_groupref(self):
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
312 ('|', 'a'))
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
314 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300315 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
316 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000317 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
318 ('a', 'a'))
319 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
320 (None, None))
321
322 def test_groupdict(self):
323 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
324 'first second').groupdict(),
325 {'first':'first', 'second':'second'})
326
327 def test_expand(self):
328 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
329 "first second")
330 .expand(r"\2 \1 \g<second> \g<first>"),
331 "second first second first")
332
333 def test_repeat_minmax(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300334 self.assertIsNone(re.match("^(\w){1}$", "abc"))
335 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
336 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
337 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000338
339 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
344 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
345 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
346 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
347
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300348 self.assertIsNone(re.match("^x{1}$", "xxx"))
349 self.assertIsNone(re.match("^x{1}?$", "xxx"))
350 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
351 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000352
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300353 self.assertTrue(re.match("^x{3}$", "xxx"))
354 self.assertTrue(re.match("^x{1,3}$", "xxx"))
355 self.assertTrue(re.match("^x{1,4}$", "xxx"))
356 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
357 self.assertTrue(re.match("^x{3}?$", "xxx"))
358 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
359 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
360 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300362 self.assertIsNone(re.match("^x{}$", "xxx"))
363 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000364
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000365 def test_getattr(self):
366 self.assertEqual(re.match("(a)", "a").pos, 0)
367 self.assertEqual(re.match("(a)", "a").endpos, 1)
368 self.assertEqual(re.match("(a)", "a").string, "a")
369 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300370 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000371
372 def test_special_escapes(self):
373 self.assertEqual(re.search(r"\b(b.)\b",
374 "abcd abc bcd bx").group(1), "bx")
375 self.assertEqual(re.search(r"\B(b.)\B",
376 "abc bcd bc abxd").group(1), "bx")
377 self.assertEqual(re.search(r"\b(b.)\b",
378 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
379 self.assertEqual(re.search(r"\B(b.)\B",
380 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300381 if have_unicode:
382 self.assertEqual(re.search(r"\b(b.)\b",
383 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
384 self.assertEqual(re.search(r"\B(b.)\B",
385 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000386 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
387 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300388 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000389 self.assertEqual(re.search(r"\b(b.)\b",
390 u"abcd abc bcd bx").group(1), "bx")
391 self.assertEqual(re.search(r"\B(b.)\B",
392 u"abc bcd bc abxd").group(1), "bx")
393 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
394 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300395 self.assertIsNone(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000396 self.assertEqual(re.search(r"\d\D\w\W\s\S",
397 "1aa! a").group(0), "1aa! a")
398 self.assertEqual(re.search(r"\d\D\w\W\s\S",
399 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300400 if have_unicode:
401 self.assertEqual(re.search(r"\d\D\w\W\s\S",
402 "1aa! a", re.UNICODE).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000403
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200404 def test_string_boundaries(self):
405 # See http://bugs.python.org/issue10713
406 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
407 "abc")
408 # There's a word boundary at the start of a string.
409 self.assertTrue(re.match(r"\b", "abc"))
410 # A non-empty string includes a non-boundary zero-length match.
411 self.assertTrue(re.search(r"\B", "abc"))
412 # There is no non-boundary match at the start of a string.
413 self.assertFalse(re.match(r"\B", "abc"))
414 # However, an empty string contains no word boundaries, and also no
415 # non-boundaries.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300416 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200417 # This one is questionable and different from the perlre behaviour,
418 # but describes current behavior.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300419 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200420 # A single word-character string has two boundaries, but no
421 # non-boundary gaps.
422 self.assertEqual(len(re.findall(r"\b", "a")), 2)
423 self.assertEqual(len(re.findall(r"\B", "a")), 0)
424 # If there are no words, there are no boundaries
425 self.assertEqual(len(re.findall(r"\b", " ")), 0)
426 self.assertEqual(len(re.findall(r"\b", " ")), 0)
427 # Can match around the whitespace.
428 self.assertEqual(len(re.findall(r"\B", " ")), 2)
429
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300430 @requires_unicode
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000431 def test_bigcharset(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300432 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
433 unichr(0x2222)).group(1), unichr(0x2222))
434 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
435 unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300436 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300437 self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000438
Antoine Pitroub83ea142012-11-20 22:30:42 +0100439 def test_big_codesize(self):
440 # Issue #1160
441 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300442 self.assertTrue(r.match('1000'))
443 self.assertTrue(r.match('9999'))
Antoine Pitroub83ea142012-11-20 22:30:42 +0100444
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000445 def test_anyall(self):
446 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
447 "a\nb")
448 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
449 "a\n\nb")
450
451 def test_non_consuming(self):
452 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
453 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
454 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
455 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
458 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
459
460 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
461 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
462 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
463 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
464
465 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000466 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
467 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000468 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
469 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
470 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
471 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
472 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
473 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
474 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
475 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
476
Serhiy Storchakae9e54ae2014-10-31 13:53:21 +0200477 def test_ignore_case_range(self):
478 # Issues #3511, #17381.
479 self.assertTrue(re.match(r'[9-a]', '_', re.I))
480 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
481 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
482 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
483 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7',re.I))
484 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
485 if have_unicode:
486 self.assertTrue(re.match(u(r'[9-a]'), u(r'_'), re.U | re.I))
487 self.assertIsNone(re.match(u(r'[9-A]'), u(r'_'), re.U | re.I))
488 self.assertTrue(re.match(u(r'[\xc0-\xde]'),
489 u(r'\xd7'), re.U | re.I))
490 self.assertIsNone(re.match(u(r'[\xc0-\xde]'),
491 u(r'\xf7'), re.U | re.I))
492 self.assertTrue(re.match(u(r'[\xe0-\xfe]'),
493 u(r'\xf7'), re.U | re.I))
494 self.assertIsNone(re.match(u(r'[\xe0-\xfe]'),
495 u(r'\xd7'), re.U | re.I))
496 self.assertTrue(re.match(u(r'[\u0430-\u045f]'),
497 u(r'\u0450'), re.U | re.I))
498 self.assertTrue(re.match(u(r'[\u0430-\u045f]'),
499 u(r'\u0400'), re.U | re.I))
500 self.assertTrue(re.match(u(r'[\u0400-\u042f]'),
501 u(r'\u0450'), re.U | re.I))
502 self.assertTrue(re.match(u(r'[\u0400-\u042f]'),
503 u(r'\u0400'), re.U | re.I))
504 if sys.maxunicode > 0xffff:
505 self.assertTrue(re.match(u(r'[\U00010428-\U0001044f]'),
506 u(r'\U00010428'), re.U | re.I))
507 self.assertTrue(re.match(u(r'[\U00010428-\U0001044f]'),
508 u(r'\U00010400'), re.U | re.I))
509 self.assertTrue(re.match(u(r'[\U00010400-\U00010427]'),
510 u(r'\U00010428'), re.U | re.I))
511 self.assertTrue(re.match(u(r'[\U00010400-\U00010427]'),
512 u(r'\U00010400'), re.U | re.I))
513
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000514 def test_category(self):
515 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
516
517 def test_getlower(self):
518 import _sre
519 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
520 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300521 if have_unicode:
522 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000523
524 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
525 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
526
527 def test_not_literal(self):
528 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
529 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
530
531 def test_search_coverage(self):
532 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
533 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
534
Ezio Melotti46645632011-03-25 14:50:52 +0200535 def assertMatch(self, pattern, text, match=None, span=None,
536 matcher=re.match):
537 if match is None and span is None:
538 # the pattern matches the whole text
539 match = text
540 span = (0, len(text))
541 elif match is None or span is None:
542 raise ValueError('If match is not None, span should be specified '
543 '(and vice versa).')
544 m = matcher(pattern, text)
545 self.assertTrue(m)
546 self.assertEqual(m.group(), match)
547 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000548
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300549 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200550 def test_re_escape(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300551 alnum_chars = unicode(string.ascii_letters + string.digits)
Ezio Melotti46645632011-03-25 14:50:52 +0200552 p = u''.join(unichr(i) for i in range(256))
553 for c in p:
554 if c in alnum_chars:
555 self.assertEqual(re.escape(c), c)
556 elif c == u'\x00':
557 self.assertEqual(re.escape(c), u'\\000')
558 else:
559 self.assertEqual(re.escape(c), u'\\' + c)
560 self.assertMatch(re.escape(c), c)
561 self.assertMatch(re.escape(p), p)
562
563 def test_re_escape_byte(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300564 alnum_chars = string.ascii_letters + string.digits
Ezio Melotti46645632011-03-25 14:50:52 +0200565 p = ''.join(chr(i) for i in range(256))
566 for b in p:
567 if b in alnum_chars:
568 self.assertEqual(re.escape(b), b)
569 elif b == b'\x00':
570 self.assertEqual(re.escape(b), b'\\000')
571 else:
572 self.assertEqual(re.escape(b), b'\\' + b)
573 self.assertMatch(re.escape(b), b)
574 self.assertMatch(re.escape(p), p)
575
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300576 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200577 def test_re_escape_non_ascii(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300578 s = u(r'xxx\u2620\u2620\u2620xxx')
Ezio Melotti46645632011-03-25 14:50:52 +0200579 s_escaped = re.escape(s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300580 self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
Ezio Melotti46645632011-03-25 14:50:52 +0200581 self.assertMatch(s_escaped, s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300582 self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
583 u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
Ezio Melotti46645632011-03-25 14:50:52 +0200584
585 def test_re_escape_non_ascii_bytes(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300586 b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
Ezio Melotti46645632011-03-25 14:50:52 +0200587 b_escaped = re.escape(b)
588 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
589 self.assertMatch(b_escaped, b)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300590 res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
Ezio Melotti46645632011-03-25 14:50:52 +0200591 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000592
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000593 def test_pickling(self):
594 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000595 self.pickle_test(pickle)
596 import cPickle
597 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000598 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000599 import_module("sre", deprecated=True)
600 from sre import _compile
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300601 # current pickle expects the _compile() reconstructor in re module
602 from re import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000603
604 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000605 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300606 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
607 pickled = pickle.dumps(oldpat, proto)
608 newpat = pickle.loads(pickled)
609 self.assertEqual(newpat, oldpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000610
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000611 def test_constants(self):
612 self.assertEqual(re.I, re.IGNORECASE)
613 self.assertEqual(re.L, re.LOCALE)
614 self.assertEqual(re.M, re.MULTILINE)
615 self.assertEqual(re.S, re.DOTALL)
616 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000617
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000618 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000619 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300620 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000621
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000622 def test_sre_character_literals(self):
623 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300624 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
625 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
626 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
627 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
628 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
629 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000630 self.assertRaises(re.error, re.match, "\911", "")
631
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000632 def test_sre_character_class_literals(self):
633 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300634 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
635 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
636 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
637 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
638 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
639 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000640 self.assertRaises(re.error, re.match, "[\911]", "")
641
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000642 def test_bug_113254(self):
643 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
644 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
645 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
646
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000647 def test_bug_527371(self):
648 # bug described in patches 527371/672491
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300649 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000650 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
651 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
652 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
653 self.assertEqual(re.match("((a))", "a").lastindex, 1)
654
655 def test_bug_545855(self):
656 # bug 545855 -- This pattern failed to cause a compile error as it
657 # should, instead provoking a TypeError.
658 self.assertRaises(re.error, re.compile, 'foo[a-')
659
660 def test_bug_418626(self):
661 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
662 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
663 # pattern '*?' on a long string.
664 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
665 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
666 20003)
667 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000668 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000669 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000670 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000671
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300672 @requires_unicode
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000673 def test_bug_612074(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300674 pat=u"["+re.escape(unichr(0x2039))+u"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000675 self.assertEqual(re.compile(pat) and 1, 1)
676
Skip Montanaro1e703c62003-04-25 15:40:28 +0000677 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000678 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000679 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000680 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
681 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
682 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000683
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200684 def test_unlimited_zero_width_repeat(self):
685 # Issue #9669
686 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
687 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
688 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
689 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
690 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
691 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
692
Skip Montanaro1e703c62003-04-25 15:40:28 +0000693 def test_scanner(self):
694 def s_ident(scanner, token): return token
695 def s_operator(scanner, token): return "op%s" % token
696 def s_float(scanner, token): return float(token)
697 def s_int(scanner, token): return int(token)
698
699 scanner = Scanner([
700 (r"[a-zA-Z_]\w*", s_ident),
701 (r"\d+\.\d*", s_float),
702 (r"\d+", s_int),
703 (r"=|\+|-|\*|/", s_operator),
704 (r"\s+", None),
705 ])
706
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300707 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000708
Skip Montanaro1e703c62003-04-25 15:40:28 +0000709 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
710 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
711 'op+', 'bar'], ''))
712
Skip Montanaro5ba00542003-04-25 16:00:14 +0000713 def test_bug_448951(self):
714 # bug 448951 (similar to 429357, but with single char match)
715 # (Also test greedy matches.)
716 for op in '','?','*':
717 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
718 (None, None))
719 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
720 ('a:', 'a'))
721
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000722 def test_bug_725106(self):
723 # capturing groups in alternatives in repeats
724 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
725 ('b', 'a'))
726 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
727 ('c', 'b'))
728 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
729 ('b', None))
730 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
731 ('b', None))
732 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
733 ('b', 'a'))
734 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
735 ('c', 'b'))
736 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
737 ('b', None))
738 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
739 ('b', None))
740
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000741 def test_bug_725149(self):
742 # mark_stack_base restoring before restoring marks
743 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
744 ('a', None))
745 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
746 ('a', None, None))
747
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300748 @requires_unicode
Just van Rossum12723ba2003-07-02 20:03:04 +0000749 def test_bug_764548(self):
750 # bug 764548, re.compile() barfs on str/unicode subclasses
Just van Rossum12723ba2003-07-02 20:03:04 +0000751 class my_unicode(unicode): pass
752 pat = re.compile(my_unicode("abc"))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300753 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000754
Skip Montanaro5ba00542003-04-25 16:00:14 +0000755 def test_finditer(self):
756 iter = re.finditer(r":+", "a:b::c:::d")
757 self.assertEqual([item.group(0) for item in iter],
758 [":", "::", ":::"])
759
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300760 @requires_unicode
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000761 def test_bug_926075(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300762 self.assertIsNot(re.compile('bug_926075'),
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300763 re.compile(u'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000764
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300765 @requires_unicode
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000766 def test_bug_931848(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300767 pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000768 self.assertEqual(re.compile(pattern).split("a.b.c"),
769 ['a','b','c'])
770
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000771 def test_bug_581080(self):
772 iter = re.finditer(r"\s", "a b")
773 self.assertEqual(iter.next().span(), (1,2))
774 self.assertRaises(StopIteration, iter.next)
775
776 scanner = re.compile(r"\s").scanner("a b")
777 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300778 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000779
780 def test_bug_817234(self):
781 iter = re.finditer(r".*", "asdf")
782 self.assertEqual(iter.next().span(), (0, 4))
783 self.assertEqual(iter.next().span(), (4, 4))
784 self.assertRaises(StopIteration, iter.next)
785
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300786 @requires_unicode
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000787 def test_bug_6561(self):
788 # '\d' should match characters in Unicode category 'Nd'
789 # (Number, Decimal Digit), but not those in 'Nl' (Number,
790 # Letter) or 'No' (Number, Other).
791 decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300792 unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
793 unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
794 unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000795 ]
796 for x in decimal_digits:
797 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
798
799 not_decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300800 unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
801 unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
802 unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
803 unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000804 ]
805 for x in not_decimal_digits:
806 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
807
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000808 def test_empty_array(self):
809 # SF buf 1647541
810 import array
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300811 typecodes = 'cbBhHiIlLfd'
812 if have_unicode:
813 typecodes += 'u'
814 for typecode in typecodes:
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000815 a = array.array(typecode)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300816 self.assertIsNone(re.compile("bla").match(a))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000817 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000818
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300819 @requires_unicode
Guido van Rossumae04c332008-01-03 19:12:44 +0000820 def test_inline_flags(self):
821 # Bug #1700
822 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
823 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
824
825 p = re.compile(upper_char, re.I | re.U)
826 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300827 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000828
829 p = re.compile(lower_char, re.I | re.U)
830 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300831 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000832
833 p = re.compile('(?i)' + upper_char, re.U)
834 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300835 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000836
837 p = re.compile('(?i)' + lower_char, re.U)
838 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300839 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000840
841 p = re.compile('(?iu)' + upper_char)
842 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300843 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000844
845 p = re.compile('(?iu)' + lower_char)
846 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300847 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000848
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000849 def test_dollar_matches_twice(self):
850 "$ matches the end of string, and just before the terminating \n"
851 pattern = re.compile('$')
852 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
853 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
854 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
855
856 pattern = re.compile('$', re.MULTILINE)
857 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
858 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
859 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
860
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000861 def test_dealloc(self):
862 # issue 3299: check for segfault in debug build
863 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000864 # the overflow limit is different on wide and narrow builds and it
865 # depends on the definition of SRE_CODE (see sre.h).
866 # 2**128 should be big enough to overflow on both. For smaller values
867 # a RuntimeError is raised instead of OverflowError.
868 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000869 self.assertRaises(TypeError, re.finditer, "a", {})
870 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000871
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200872 def test_compile(self):
873 # Test return value when given string and pattern as parameter
874 pattern = re.compile('random pattern')
875 self.assertIsInstance(pattern, re._pattern_type)
876 same_pattern = re.compile(pattern)
877 self.assertIsInstance(same_pattern, re._pattern_type)
878 self.assertIs(same_pattern, pattern)
879 # Test behaviour when not given a string or pattern as parameter
880 self.assertRaises(TypeError, re.compile, 0)
881
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200882 def test_bug_13899(self):
883 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
884 # nothing. Ditto B and Z.
885 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
886 ['A', 'B', '\b', 'C', 'Z'])
887
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100888 @precisionbigmemtest(size=_2G, memuse=1)
889 def test_large_search(self, size):
890 # Issue #10182: indices were 32-bit-truncated.
891 s = 'a' * size
892 m = re.search('$', s)
893 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100894 self.assertEqual(m.start(), size)
895 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100896
Antoine Pitroub83575b2012-12-02 12:52:36 +0100897 # The huge memuse is because of re.sub() using a list and a join()
898 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100899 @precisionbigmemtest(size=_2G, memuse=16 + 2)
900 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100901 # Issue #10182: indices were 32-bit-truncated.
902 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100903 r, n = re.subn('', '', s)
904 self.assertEqual(r, s)
905 self.assertEqual(n, size + 1)
906
907
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200908 def test_repeat_minmax_overflow(self):
909 # Issue #13169
910 string = "x" * 100000
911 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
912 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
913 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
914 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
915 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
916 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
917 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
918 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
919 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
920 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
921 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
922
923 @cpython_only
924 def test_repeat_minmax_overflow_maxrepeat(self):
925 try:
926 from _sre import MAXREPEAT
927 except ImportError:
928 self.skipTest('requires _sre.MAXREPEAT constant')
929 string = "x" * 100000
930 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
931 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
932 (0, 100000))
933 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
934 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
935 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
936 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
937
R David Murray60773392013-04-14 13:08:50 -0400938 def test_backref_group_name_in_exception(self):
939 # Issue 17341: Poor error message when compiling invalid regex
940 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
941 re.compile('(?P=<foo>)')
942
943 def test_group_name_in_exception(self):
944 # Issue 17341: Poor error message when compiling invalid regex
945 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
946 re.compile('(?P<?foo>)')
947
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300948 def test_issue17998(self):
949 for reps in '*', '+', '?', '{1}':
950 for mod in '', '?':
951 pattern = '.' + reps + mod + 'yz'
952 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
953 ['xyz'], msg=pattern)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300954 if have_unicode:
955 pattern = unicode(pattern)
956 self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
957 [u'xyz'], msg=pattern)
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300958
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200959
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300960 def test_bug_2537(self):
961 # issue 2537: empty submatches
962 for outer_op in ('{0,}', '*', '+', '{1,187}'):
963 for inner_op in ('{0,}', '*', '?'):
964 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
965 m = r.match("xyyzy")
966 self.assertEqual(m.group(0), "xyy")
967 self.assertEqual(m.group(1), "")
968 self.assertEqual(m.group(2), "y")
969
Antoine Pitrouf5814112014-02-03 20:59:59 +0100970 def test_debug_flag(self):
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300971 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitrouf5814112014-02-03 20:59:59 +0100972 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300973 re.compile(pat, re.DEBUG)
974 dump = '''\
975subpattern 1
976 literal 46
977subpattern None
978 branch
979 in
980 literal 99
981 literal 104
982 or
983 literal 112
984 literal 121
985subpattern None
986 groupref_exists 1
987 at at_end
988 else
989 literal 58
990 literal 32
991'''
992 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +0100993 # Debug output is output again even a second time (bypassing
994 # the cache -- issue #20426).
995 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300996 re.compile(pat, re.DEBUG)
997 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +0100998
Serhiy Storchakae50fe4c2014-03-06 12:24:29 +0200999 def test_keyword_parameters(self):
1000 # Issue #20283: Accepting the string keyword parameter.
1001 pat = re.compile(r'(ab)')
1002 self.assertEqual(
1003 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1004 self.assertEqual(
1005 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1006 self.assertEqual(
1007 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1008 self.assertEqual(
1009 pat.split(string='abracadabra', maxsplit=1),
1010 ['', 'ab', 'racadabra'])
1011
Benjamin Petersonbc4ece52014-09-30 22:04:28 -04001012 def test_match_group_takes_long(self):
1013 self.assertEqual(re.match("(foo)", "foo").group(1L), "foo")
1014 self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1)
1015
Serhiy Storchakad4c72902014-10-31 00:53:19 +02001016 def test_locale_caching(self):
1017 # Issue #22410
1018 oldlocale = locale.setlocale(locale.LC_CTYPE)
1019 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1020 for loc in 'en_US.iso88591', 'en_US.utf8':
1021 try:
1022 locale.setlocale(locale.LC_CTYPE, loc)
1023 except locale.Error:
1024 # Unsupported locale on this system
1025 self.skipTest('test needs %s locale' % loc)
1026
1027 re.purge()
1028 self.check_en_US_iso88591()
1029 self.check_en_US_utf8()
1030 re.purge()
1031 self.check_en_US_utf8()
1032 self.check_en_US_iso88591()
1033
1034 def check_en_US_iso88591(self):
1035 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1036 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1037 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1038 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1039 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1040 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1041 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1042
1043 def check_en_US_utf8(self):
1044 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1045 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1046 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1047 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1048 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1049 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1050 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1051
Antoine Pitrouf5814112014-02-03 20:59:59 +01001052
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001053def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +00001054 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001055 if verbose:
1056 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001057 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001058 # To save time, only run the first and last 10 tests
1059 #tests = tests[:10] + tests[-10:]
1060 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001061
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001062 for t in tests:
1063 sys.stdout.flush()
1064 pattern = s = outcome = repl = expected = None
1065 if len(t) == 5:
1066 pattern, s, outcome, repl, expected = t
1067 elif len(t) == 3:
1068 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001069 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001070 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
1071
Guido van Rossum41360a41998-03-26 19:42:58 +00001072 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001073 obj = re.compile(pattern)
1074 except re.error:
1075 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001076 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001077 print '=== Syntax error:', t
1078 except KeyboardInterrupt: raise KeyboardInterrupt
1079 except:
1080 print '*** Unexpected error ***', t
1081 if verbose:
1082 traceback.print_exc(file=sys.stdout)
1083 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001084 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001085 result = obj.search(s)
1086 except re.error, msg:
1087 print '=== Unexpected exception', t, repr(msg)
1088 if outcome == SYNTAX_ERROR:
1089 # This should have been a syntax error; forget it.
1090 pass
1091 elif outcome == FAIL:
1092 if result is None: pass # No match, as expected
1093 else: print '=== Succeeded incorrectly', t
1094 elif outcome == SUCCEED:
1095 if result is not None:
1096 # Matched, as expected, so now we compute the
1097 # result string and compare it to our expected result.
1098 start, end = result.span(0)
1099 vardict={'found': result.group(0),
1100 'groups': result.group(),
1101 'flags': result.re.flags}
1102 for i in range(1, 100):
1103 try:
1104 gi = result.group(i)
1105 # Special hack because else the string concat fails:
1106 if gi is None:
1107 gi = "None"
1108 except IndexError:
1109 gi = "Error"
1110 vardict['g%d' % i] = gi
1111 for i in result.re.groupindex.keys():
1112 try:
1113 gi = result.group(i)
1114 if gi is None:
1115 gi = "None"
1116 except IndexError:
1117 gi = "Error"
1118 vardict[i] = gi
1119 repl = eval(repl, vardict)
1120 if repl != expected:
1121 print '=== grouping error', t,
1122 print repr(repl) + ' should be ' + repr(expected)
1123 else:
1124 print '=== Failed incorrectly', t
1125
1126 # Try the match on a unicode string, and check that it
1127 # still succeeds.
1128 try:
1129 result = obj.search(unicode(s, "latin-1"))
1130 if result is None:
1131 print '=== Fails on unicode match', t
1132 except NameError:
1133 continue # 1.5.2
1134 except TypeError:
1135 continue # unicode test case
1136
1137 # Try the match on a unicode pattern, and check that it
1138 # still succeeds.
1139 obj=re.compile(unicode(pattern, "latin-1"))
1140 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001141 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001142 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001143
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001144 # Try the match with the search area limited to the extent
1145 # of the match and see if it still succeeds. \B will
1146 # break (because it won't match at the end or start of a
1147 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001148
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001149 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1150 and result is not None:
1151 obj = re.compile(pattern)
1152 result = obj.search(s, result.start(0), result.end(0) + 1)
1153 if result is None:
1154 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001155
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001156 # Try the match with IGNORECASE enabled, and check that it
1157 # still succeeds.
1158 obj = re.compile(pattern, re.IGNORECASE)
1159 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001160 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001161 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001162
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001163 # Try the match with LOCALE enabled, and check that it
1164 # still succeeds.
1165 obj = re.compile(pattern, re.LOCALE)
1166 result = obj.search(s)
1167 if result is None:
1168 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001169
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001170 # Try the match with UNICODE locale enabled, and check
1171 # that it still succeeds.
1172 obj = re.compile(pattern, re.UNICODE)
1173 result = obj.search(s)
1174 if result is None:
1175 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001176
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001177def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001178 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001179 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001180
1181if __name__ == "__main__":
1182 test_main()