blob: d2cc0c3183f094771995b09bf1a5b02d46185c69 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Serhiy Storchaka7644ff12014-09-14 17:40:44 +03003from test.test_support import captured_stdout, have_unicode, requires_unicode, u
Serhiy Storchakad4c72902014-10-31 00:53:19 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00006from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04007import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02008import sys
9import string
10import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000011from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000012
Antoine Pitrou735f36e2012-12-03 20:53:12 +010013
Guido van Rossum23b22571997-07-17 22:36:14 +000014# Misc tests from Tim Peters' re.doc
15
Just van Rossum6802c6e2003-07-02 14:36:59 +000016# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020017# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000018# cover most of the code.
19
Skip Montanaro8ed06da2003-04-24 19:43:18 +000020import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000021
Skip Montanaro8ed06da2003-04-24 19:43:18 +000022class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000023
24 def test_weakref(self):
25 s = 'QabbbcR'
26 x = re.compile('ab+c')
27 y = proxy(x)
28 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
29
Skip Montanaro8ed06da2003-04-24 19:43:18 +000030 def test_search_star_plus(self):
31 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
32 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
33 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
34 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030035 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000036 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
37 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
38 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
39 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030040 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000043 int_value = int(matchobj.group(0))
44 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000045
Skip Montanaro8ed06da2003-04-24 19:43:18 +000046 def test_basic_re_sub(self):
47 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
49 '9.3 -3 24x100y')
50 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
51 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
54 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000055
Skip Montanaro8ed06da2003-04-24 19:43:18 +000056 s = r"\1\1"
57 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
58 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
59 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000060
Skip Montanaro8ed06da2003-04-24 19:43:18 +000061 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
63 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
64 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000065
Skip Montanaro8ed06da2003-04-24 19:43:18 +000066 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
67 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
68 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
69 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
70 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000071
Skip Montanaro8ed06da2003-04-24 19:43:18 +000072 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000073
Skip Montanaro2726fcd2003-04-25 14:31:54 +000074 def test_bug_449964(self):
75 # fails for group followed by other escape
76 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
77 'xx\bxx\b')
78
79 def test_bug_449000(self):
80 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000081 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
86 'abc\ndef\n')
87 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
88 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000089
Serhiy Storchaka7644ff12014-09-14 17:40:44 +030090 @requires_unicode
Guido van Rossum1ff91d92007-09-10 22:02:25 +000091 def test_bug_1140(self):
92 # re.sub(x, y, u'') should return u'', not '', and
93 # re.sub(x, y, '') should return '', not u''.
94 # Also:
95 # re.sub(x, y, unicode(x)) should return unicode(y), and
96 # re.sub(x, y, str(x)) should return
97 # str(y) if isinstance(y, str) else unicode(y).
98 for x in 'x', u'x':
99 for y in 'y', u'y':
100 z = re.sub(x, y, u'')
101 self.assertEqual(z, u'')
102 self.assertEqual(type(z), unicode)
103 #
104 z = re.sub(x, y, '')
105 self.assertEqual(z, '')
106 self.assertEqual(type(z), str)
107 #
108 z = re.sub(x, y, unicode(x))
109 self.assertEqual(z, y)
110 self.assertEqual(type(z), unicode)
111 #
112 z = re.sub(x, y, str(x))
113 self.assertEqual(z, y)
114 self.assertEqual(type(z), type(y))
115
Raymond Hettinger80016c92007-12-19 18:13:31 +0000116 def test_bug_1661(self):
117 # Verify that flags do not get silently ignored with compiled patterns
118 pattern = re.compile('.')
119 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
121 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
122 self.assertRaises(ValueError, re.compile, pattern, re.I)
123
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000124 def test_bug_3629(self):
125 # A regex that triggered a bug in the sre-code validator
126 re.compile("(?P<quote>)(?(quote))")
127
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000128 def test_sub_template_numeric_escape(self):
129 # bug 776311 and friends
130 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
131 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
132 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
133 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
134 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
135 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
136 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
137
138 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
139 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
140
141 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
142 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
143 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
144 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
145 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
146
147 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
148 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000149
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000150 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
158 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
159 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
160 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
161 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
162
163 # in python2.3 (etc), these loop endlessly in sre_parser.py
164 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
165 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
166 'xz8')
167 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
168 'xza')
169
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000170 def test_qualified_re_sub(self):
171 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
172 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000173
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000174 def test_bug_114660(self):
175 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
176 'hello there')
177
178 def test_bug_462270(self):
179 # Test for empty sub() behaviour, see SF bug #462270
180 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
181 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
182
Ezio Melottief317382012-11-03 20:31:12 +0200183 def test_symbolic_groups(self):
184 re.compile('(?P<a>x)(?P=a)(?(a)y)')
185 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
186 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
187 self.assertRaises(re.error, re.compile, '(?Px)')
188 self.assertRaises(re.error, re.compile, '(?P=)')
189 self.assertRaises(re.error, re.compile, '(?P=1)')
190 self.assertRaises(re.error, re.compile, '(?P=a)')
191 self.assertRaises(re.error, re.compile, '(?P=a1)')
192 self.assertRaises(re.error, re.compile, '(?P=a.)')
193 self.assertRaises(re.error, re.compile, '(?P<)')
194 self.assertRaises(re.error, re.compile, '(?P<>)')
195 self.assertRaises(re.error, re.compile, '(?P<1>)')
196 self.assertRaises(re.error, re.compile, '(?P<a.>)')
197 self.assertRaises(re.error, re.compile, '(?())')
198 self.assertRaises(re.error, re.compile, '(?(a))')
199 self.assertRaises(re.error, re.compile, '(?(1a))')
200 self.assertRaises(re.error, re.compile, '(?(a.))')
201
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 def test_symbolic_refs(self):
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200207 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
209 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
210 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
211 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000212 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000213
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000214 def test_re_subn(self):
215 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
216 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
217 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
218 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
219 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000220
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000221 def test_re_split(self):
222 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
223 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
224 self.assertEqual(re.split("(:*)", ":a:b::c"),
225 ['', ':', 'a', ':', 'b', '::', 'c'])
226 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
227 self.assertEqual(re.split("(:)*", ":a:b::c"),
228 ['', ':', 'a', ':', 'b', ':', 'c'])
229 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
230 ['', ':', 'a', ':b::', 'c'])
231 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
232 ['', None, ':', 'a', None, ':', '', 'b', None, '',
233 None, '::', 'c'])
234 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
235 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000236
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000237 def test_qualified_re_split(self):
238 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
239 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
240 self.assertEqual(re.split("(:)", ":a:b::c", 2),
241 ['', ':', 'a', ':', 'b::c'])
242 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
243 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000244
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000245 def test_re_findall(self):
246 self.assertEqual(re.findall(":+", "abc"), [])
247 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
248 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
249 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
250 (":", ":"),
251 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000252
Skip Montanaro5ba00542003-04-25 16:00:14 +0000253 def test_bug_117612(self):
254 self.assertEqual(re.findall(r"(a|(b))", "aba"),
255 [("a", ""),("b", "b"),("a", "")])
256
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000257 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000258 self.assertEqual(re.match('a', 'a').groups(), ())
259 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
260 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
261 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
262 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000263
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000264 pat = re.compile('((a)|(b))(c)?')
265 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
266 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
267 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
268 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
269 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000270
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000271 # A single group
272 m = re.match('(a)', 'a')
273 self.assertEqual(m.group(0), 'a')
274 self.assertEqual(m.group(0), 'a')
275 self.assertEqual(m.group(1), 'a')
276 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000277
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000278 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
279 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
280 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
281 (None, 'b', None))
282 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000283
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
286 ('(', 'a'))
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
288 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300289 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
290 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000291 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
292 ('a', 'b'))
293 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
294 (None, 'd'))
295 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
296 (None, 'd'))
297 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
298 ('a', ''))
299
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000300 # Tests for bug #1177831: exercise groups other than the first group
301 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
302 self.assertEqual(p.match('abc').groups(),
303 ('a', 'b', 'c'))
304 self.assertEqual(p.match('ad').groups(),
305 ('a', None, 'd'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300306 self.assertIsNone(p.match('abd'))
307 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000308
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000309
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000310 def test_re_groupref(self):
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
312 ('|', 'a'))
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
314 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300315 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
316 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000317 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
318 ('a', 'a'))
319 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
320 (None, None))
321
322 def test_groupdict(self):
323 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
324 'first second').groupdict(),
325 {'first':'first', 'second':'second'})
326
327 def test_expand(self):
328 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
329 "first second")
330 .expand(r"\2 \1 \g<second> \g<first>"),
331 "second first second first")
332
333 def test_repeat_minmax(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300334 self.assertIsNone(re.match("^(\w){1}$", "abc"))
335 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
336 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
337 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000338
339 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
344 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
345 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
346 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
347
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300348 self.assertIsNone(re.match("^x{1}$", "xxx"))
349 self.assertIsNone(re.match("^x{1}?$", "xxx"))
350 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
351 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000352
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300353 self.assertTrue(re.match("^x{3}$", "xxx"))
354 self.assertTrue(re.match("^x{1,3}$", "xxx"))
355 self.assertTrue(re.match("^x{1,4}$", "xxx"))
356 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
357 self.assertTrue(re.match("^x{3}?$", "xxx"))
358 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
359 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
360 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300362 self.assertIsNone(re.match("^x{}$", "xxx"))
363 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000364
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000365 def test_getattr(self):
366 self.assertEqual(re.match("(a)", "a").pos, 0)
367 self.assertEqual(re.match("(a)", "a").endpos, 1)
368 self.assertEqual(re.match("(a)", "a").string, "a")
369 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300370 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000371
372 def test_special_escapes(self):
373 self.assertEqual(re.search(r"\b(b.)\b",
374 "abcd abc bcd bx").group(1), "bx")
375 self.assertEqual(re.search(r"\B(b.)\B",
376 "abc bcd bc abxd").group(1), "bx")
377 self.assertEqual(re.search(r"\b(b.)\b",
378 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
379 self.assertEqual(re.search(r"\B(b.)\B",
380 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300381 if have_unicode:
382 self.assertEqual(re.search(r"\b(b.)\b",
383 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
384 self.assertEqual(re.search(r"\B(b.)\B",
385 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000386 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
387 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300388 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000389 self.assertEqual(re.search(r"\b(b.)\b",
390 u"abcd abc bcd bx").group(1), "bx")
391 self.assertEqual(re.search(r"\B(b.)\B",
392 u"abc bcd bc abxd").group(1), "bx")
393 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
394 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300395 self.assertIsNone(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000396 self.assertEqual(re.search(r"\d\D\w\W\s\S",
397 "1aa! a").group(0), "1aa! a")
398 self.assertEqual(re.search(r"\d\D\w\W\s\S",
399 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300400 if have_unicode:
401 self.assertEqual(re.search(r"\d\D\w\W\s\S",
402 "1aa! a", re.UNICODE).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000403
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200404 def test_string_boundaries(self):
405 # See http://bugs.python.org/issue10713
406 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
407 "abc")
408 # There's a word boundary at the start of a string.
409 self.assertTrue(re.match(r"\b", "abc"))
410 # A non-empty string includes a non-boundary zero-length match.
411 self.assertTrue(re.search(r"\B", "abc"))
412 # There is no non-boundary match at the start of a string.
413 self.assertFalse(re.match(r"\B", "abc"))
414 # However, an empty string contains no word boundaries, and also no
415 # non-boundaries.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300416 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200417 # This one is questionable and different from the perlre behaviour,
418 # but describes current behavior.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300419 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200420 # A single word-character string has two boundaries, but no
421 # non-boundary gaps.
422 self.assertEqual(len(re.findall(r"\b", "a")), 2)
423 self.assertEqual(len(re.findall(r"\B", "a")), 0)
424 # If there are no words, there are no boundaries
425 self.assertEqual(len(re.findall(r"\b", " ")), 0)
426 self.assertEqual(len(re.findall(r"\b", " ")), 0)
427 # Can match around the whitespace.
428 self.assertEqual(len(re.findall(r"\B", " ")), 2)
429
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300430 @requires_unicode
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000431 def test_bigcharset(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300432 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
433 unichr(0x2222)).group(1), unichr(0x2222))
434 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
435 unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300436 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300437 self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000438
Antoine Pitroub83ea142012-11-20 22:30:42 +0100439 def test_big_codesize(self):
440 # Issue #1160
441 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300442 self.assertTrue(r.match('1000'))
443 self.assertTrue(r.match('9999'))
Antoine Pitroub83ea142012-11-20 22:30:42 +0100444
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000445 def test_anyall(self):
446 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
447 "a\nb")
448 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
449 "a\n\nb")
450
Serhiy Storchaka15ea8702014-11-07 21:43:45 +0200451 def test_lookahead(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000452 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
453 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
454 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
455 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
458 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
459
460 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
461 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
462 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
463 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
464
Serhiy Storchaka15ea8702014-11-07 21:43:45 +0200465 # Group reference.
466 self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
467 self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
468 # Conditional group reference.
469 self.assertTrue(re.match('(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
470 self.assertIsNone(re.match('(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
471 self.assertTrue(re.match('(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
472 self.assertIsNone(re.match('(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
473 self.assertTrue(re.match('(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
474 # Group used before defined.
475 self.assertTrue(re.match('(a)b(?=(?(2)x|c))(c)', 'abc'))
476 self.assertIsNone(re.match('(a)b(?=(?(2)b|x))(c)', 'abc'))
477 self.assertTrue(re.match('(a)b(?=(?(1)c|x))(c)', 'abc'))
478
479 def test_lookbehind(self):
480 self.assertTrue(re.match('ab(?<=b)c', 'abc'))
481 self.assertIsNone(re.match('ab(?<=c)c', 'abc'))
482 self.assertIsNone(re.match('ab(?<!b)c', 'abc'))
483 self.assertTrue(re.match('ab(?<!c)c', 'abc'))
484 # Group reference.
485 self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
486 self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
487 self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
488 self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
489 # Conditional group reference.
490 self.assertIsNone(re.match('(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
491 self.assertIsNone(re.match('(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
492 self.assertTrue(re.match('(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
493 self.assertIsNone(re.match('(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
494 self.assertTrue(re.match('(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
495 # Group used before defined.
496 self.assertIsNone(re.match('(a)b(?<=(?(2)x|c))(c)', 'abc'))
497 self.assertIsNone(re.match('(a)b(?<=(?(2)b|x))(c)', 'abc'))
498 self.assertIsNone(re.match('(a)b(?<=(?(1)c|x))(c)', 'abc'))
499 self.assertTrue(re.match('(a)b(?<=(?(1)b|x))(c)', 'abc'))
500
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000501 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000502 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
503 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000504 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
505 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
506 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
507 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
508 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
509 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
510 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
511 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
512
Serhiy Storchakae9e54ae2014-10-31 13:53:21 +0200513 def test_ignore_case_range(self):
514 # Issues #3511, #17381.
515 self.assertTrue(re.match(r'[9-a]', '_', re.I))
516 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
517 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
518 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
519 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7',re.I))
520 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
521 if have_unicode:
522 self.assertTrue(re.match(u(r'[9-a]'), u(r'_'), re.U | re.I))
523 self.assertIsNone(re.match(u(r'[9-A]'), u(r'_'), re.U | re.I))
524 self.assertTrue(re.match(u(r'[\xc0-\xde]'),
525 u(r'\xd7'), re.U | re.I))
526 self.assertIsNone(re.match(u(r'[\xc0-\xde]'),
527 u(r'\xf7'), re.U | re.I))
528 self.assertTrue(re.match(u(r'[\xe0-\xfe]'),
529 u(r'\xf7'), re.U | re.I))
530 self.assertIsNone(re.match(u(r'[\xe0-\xfe]'),
531 u(r'\xd7'), re.U | re.I))
532 self.assertTrue(re.match(u(r'[\u0430-\u045f]'),
533 u(r'\u0450'), re.U | re.I))
534 self.assertTrue(re.match(u(r'[\u0430-\u045f]'),
535 u(r'\u0400'), re.U | re.I))
536 self.assertTrue(re.match(u(r'[\u0400-\u042f]'),
537 u(r'\u0450'), re.U | re.I))
538 self.assertTrue(re.match(u(r'[\u0400-\u042f]'),
539 u(r'\u0400'), re.U | re.I))
540 if sys.maxunicode > 0xffff:
541 self.assertTrue(re.match(u(r'[\U00010428-\U0001044f]'),
542 u(r'\U00010428'), re.U | re.I))
543 self.assertTrue(re.match(u(r'[\U00010428-\U0001044f]'),
544 u(r'\U00010400'), re.U | re.I))
545 self.assertTrue(re.match(u(r'[\U00010400-\U00010427]'),
546 u(r'\U00010428'), re.U | re.I))
547 self.assertTrue(re.match(u(r'[\U00010400-\U00010427]'),
548 u(r'\U00010400'), re.U | re.I))
549
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000550 def test_category(self):
551 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
552
553 def test_getlower(self):
554 import _sre
555 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
556 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300557 if have_unicode:
558 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000559
560 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
561 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
562
563 def test_not_literal(self):
564 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
565 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
566
567 def test_search_coverage(self):
568 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
569 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
570
Ezio Melotti46645632011-03-25 14:50:52 +0200571 def assertMatch(self, pattern, text, match=None, span=None,
572 matcher=re.match):
573 if match is None and span is None:
574 # the pattern matches the whole text
575 match = text
576 span = (0, len(text))
577 elif match is None or span is None:
578 raise ValueError('If match is not None, span should be specified '
579 '(and vice versa).')
580 m = matcher(pattern, text)
581 self.assertTrue(m)
582 self.assertEqual(m.group(), match)
583 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000584
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300585 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200586 def test_re_escape(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300587 alnum_chars = unicode(string.ascii_letters + string.digits)
Ezio Melotti46645632011-03-25 14:50:52 +0200588 p = u''.join(unichr(i) for i in range(256))
589 for c in p:
590 if c in alnum_chars:
591 self.assertEqual(re.escape(c), c)
592 elif c == u'\x00':
593 self.assertEqual(re.escape(c), u'\\000')
594 else:
595 self.assertEqual(re.escape(c), u'\\' + c)
596 self.assertMatch(re.escape(c), c)
597 self.assertMatch(re.escape(p), p)
598
599 def test_re_escape_byte(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300600 alnum_chars = string.ascii_letters + string.digits
Ezio Melotti46645632011-03-25 14:50:52 +0200601 p = ''.join(chr(i) for i in range(256))
602 for b in p:
603 if b in alnum_chars:
604 self.assertEqual(re.escape(b), b)
605 elif b == b'\x00':
606 self.assertEqual(re.escape(b), b'\\000')
607 else:
608 self.assertEqual(re.escape(b), b'\\' + b)
609 self.assertMatch(re.escape(b), b)
610 self.assertMatch(re.escape(p), p)
611
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300612 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200613 def test_re_escape_non_ascii(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300614 s = u(r'xxx\u2620\u2620\u2620xxx')
Ezio Melotti46645632011-03-25 14:50:52 +0200615 s_escaped = re.escape(s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300616 self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
Ezio Melotti46645632011-03-25 14:50:52 +0200617 self.assertMatch(s_escaped, s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300618 self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
619 u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
Ezio Melotti46645632011-03-25 14:50:52 +0200620
621 def test_re_escape_non_ascii_bytes(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300622 b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
Ezio Melotti46645632011-03-25 14:50:52 +0200623 b_escaped = re.escape(b)
624 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
625 self.assertMatch(b_escaped, b)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300626 res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
Ezio Melotti46645632011-03-25 14:50:52 +0200627 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000628
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000629 def test_pickling(self):
630 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000631 self.pickle_test(pickle)
632 import cPickle
633 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000634 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000635 import_module("sre", deprecated=True)
636 from sre import _compile
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300637 # current pickle expects the _compile() reconstructor in re module
638 from re import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000639
640 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000641 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300642 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
643 pickled = pickle.dumps(oldpat, proto)
644 newpat = pickle.loads(pickled)
645 self.assertEqual(newpat, oldpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000646
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000647 def test_constants(self):
648 self.assertEqual(re.I, re.IGNORECASE)
649 self.assertEqual(re.L, re.LOCALE)
650 self.assertEqual(re.M, re.MULTILINE)
651 self.assertEqual(re.S, re.DOTALL)
652 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000653
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000654 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000655 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300656 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000657
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000658 def test_sre_character_literals(self):
659 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300660 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
661 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
662 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
663 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
664 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
665 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000666 self.assertRaises(re.error, re.match, "\911", "")
667
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000668 def test_sre_character_class_literals(self):
669 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300670 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
671 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
672 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
673 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
674 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
675 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000676 self.assertRaises(re.error, re.match, "[\911]", "")
677
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000678 def test_bug_113254(self):
679 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
680 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
681 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
682
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000683 def test_bug_527371(self):
684 # bug described in patches 527371/672491
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300685 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000686 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
687 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
688 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
689 self.assertEqual(re.match("((a))", "a").lastindex, 1)
690
691 def test_bug_545855(self):
692 # bug 545855 -- This pattern failed to cause a compile error as it
693 # should, instead provoking a TypeError.
694 self.assertRaises(re.error, re.compile, 'foo[a-')
695
696 def test_bug_418626(self):
697 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
698 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
699 # pattern '*?' on a long string.
700 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
701 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
702 20003)
703 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000704 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000705 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000706 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000707
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300708 @requires_unicode
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000709 def test_bug_612074(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300710 pat=u"["+re.escape(unichr(0x2039))+u"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000711 self.assertEqual(re.compile(pat) and 1, 1)
712
Skip Montanaro1e703c62003-04-25 15:40:28 +0000713 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000714 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000715 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000716 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
717 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
718 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000719
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200720 def test_unlimited_zero_width_repeat(self):
721 # Issue #9669
722 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
723 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
724 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
725 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
726 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
727 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
728
Skip Montanaro1e703c62003-04-25 15:40:28 +0000729 def test_scanner(self):
730 def s_ident(scanner, token): return token
731 def s_operator(scanner, token): return "op%s" % token
732 def s_float(scanner, token): return float(token)
733 def s_int(scanner, token): return int(token)
734
735 scanner = Scanner([
736 (r"[a-zA-Z_]\w*", s_ident),
737 (r"\d+\.\d*", s_float),
738 (r"\d+", s_int),
739 (r"=|\+|-|\*|/", s_operator),
740 (r"\s+", None),
741 ])
742
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300743 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000744
Skip Montanaro1e703c62003-04-25 15:40:28 +0000745 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
746 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
747 'op+', 'bar'], ''))
748
Skip Montanaro5ba00542003-04-25 16:00:14 +0000749 def test_bug_448951(self):
750 # bug 448951 (similar to 429357, but with single char match)
751 # (Also test greedy matches.)
752 for op in '','?','*':
753 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
754 (None, None))
755 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
756 ('a:', 'a'))
757
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000758 def test_bug_725106(self):
759 # capturing groups in alternatives in repeats
760 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
761 ('b', 'a'))
762 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
763 ('c', 'b'))
764 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
765 ('b', None))
766 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
767 ('b', None))
768 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
769 ('b', 'a'))
770 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
771 ('c', 'b'))
772 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
773 ('b', None))
774 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
775 ('b', None))
776
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000777 def test_bug_725149(self):
778 # mark_stack_base restoring before restoring marks
779 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
780 ('a', None))
781 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
782 ('a', None, None))
783
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300784 @requires_unicode
Just van Rossum12723ba2003-07-02 20:03:04 +0000785 def test_bug_764548(self):
786 # bug 764548, re.compile() barfs on str/unicode subclasses
Just van Rossum12723ba2003-07-02 20:03:04 +0000787 class my_unicode(unicode): pass
788 pat = re.compile(my_unicode("abc"))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300789 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000790
Skip Montanaro5ba00542003-04-25 16:00:14 +0000791 def test_finditer(self):
792 iter = re.finditer(r":+", "a:b::c:::d")
793 self.assertEqual([item.group(0) for item in iter],
794 [":", "::", ":::"])
795
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300796 @requires_unicode
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000797 def test_bug_926075(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300798 self.assertIsNot(re.compile('bug_926075'),
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300799 re.compile(u'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000800
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300801 @requires_unicode
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000802 def test_bug_931848(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300803 pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000804 self.assertEqual(re.compile(pattern).split("a.b.c"),
805 ['a','b','c'])
806
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000807 def test_bug_581080(self):
808 iter = re.finditer(r"\s", "a b")
809 self.assertEqual(iter.next().span(), (1,2))
810 self.assertRaises(StopIteration, iter.next)
811
812 scanner = re.compile(r"\s").scanner("a b")
813 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300814 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000815
816 def test_bug_817234(self):
817 iter = re.finditer(r".*", "asdf")
818 self.assertEqual(iter.next().span(), (0, 4))
819 self.assertEqual(iter.next().span(), (4, 4))
820 self.assertRaises(StopIteration, iter.next)
821
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300822 @requires_unicode
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000823 def test_bug_6561(self):
824 # '\d' should match characters in Unicode category 'Nd'
825 # (Number, Decimal Digit), but not those in 'Nl' (Number,
826 # Letter) or 'No' (Number, Other).
827 decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300828 unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
829 unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
830 unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000831 ]
832 for x in decimal_digits:
833 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
834
835 not_decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300836 unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
837 unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
838 unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
839 unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000840 ]
841 for x in not_decimal_digits:
842 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
843
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000844 def test_empty_array(self):
845 # SF buf 1647541
846 import array
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300847 typecodes = 'cbBhHiIlLfd'
848 if have_unicode:
849 typecodes += 'u'
850 for typecode in typecodes:
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000851 a = array.array(typecode)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300852 self.assertIsNone(re.compile("bla").match(a))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000853 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000854
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300855 @requires_unicode
Guido van Rossumae04c332008-01-03 19:12:44 +0000856 def test_inline_flags(self):
857 # Bug #1700
858 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
859 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
860
861 p = re.compile(upper_char, re.I | re.U)
862 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300863 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000864
865 p = re.compile(lower_char, re.I | re.U)
866 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300867 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000868
869 p = re.compile('(?i)' + upper_char, re.U)
870 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300871 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000872
873 p = re.compile('(?i)' + lower_char, re.U)
874 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300875 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000876
877 p = re.compile('(?iu)' + upper_char)
878 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300879 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000880
881 p = re.compile('(?iu)' + lower_char)
882 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300883 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000884
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000885 def test_dollar_matches_twice(self):
886 "$ matches the end of string, and just before the terminating \n"
887 pattern = re.compile('$')
888 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
889 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
890 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
891
892 pattern = re.compile('$', re.MULTILINE)
893 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
894 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
895 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
896
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000897 def test_dealloc(self):
898 # issue 3299: check for segfault in debug build
899 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000900 # the overflow limit is different on wide and narrow builds and it
901 # depends on the definition of SRE_CODE (see sre.h).
902 # 2**128 should be big enough to overflow on both. For smaller values
903 # a RuntimeError is raised instead of OverflowError.
904 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000905 self.assertRaises(TypeError, re.finditer, "a", {})
906 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000907
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200908 def test_compile(self):
909 # Test return value when given string and pattern as parameter
910 pattern = re.compile('random pattern')
911 self.assertIsInstance(pattern, re._pattern_type)
912 same_pattern = re.compile(pattern)
913 self.assertIsInstance(same_pattern, re._pattern_type)
914 self.assertIs(same_pattern, pattern)
915 # Test behaviour when not given a string or pattern as parameter
916 self.assertRaises(TypeError, re.compile, 0)
917
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200918 def test_bug_13899(self):
919 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
920 # nothing. Ditto B and Z.
921 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
922 ['A', 'B', '\b', 'C', 'Z'])
923
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100924 @precisionbigmemtest(size=_2G, memuse=1)
925 def test_large_search(self, size):
926 # Issue #10182: indices were 32-bit-truncated.
927 s = 'a' * size
928 m = re.search('$', s)
929 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100930 self.assertEqual(m.start(), size)
931 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100932
Antoine Pitroub83575b2012-12-02 12:52:36 +0100933 # The huge memuse is because of re.sub() using a list and a join()
934 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100935 @precisionbigmemtest(size=_2G, memuse=16 + 2)
936 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100937 # Issue #10182: indices were 32-bit-truncated.
938 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100939 r, n = re.subn('', '', s)
940 self.assertEqual(r, s)
941 self.assertEqual(n, size + 1)
942
943
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200944 def test_repeat_minmax_overflow(self):
945 # Issue #13169
946 string = "x" * 100000
947 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
948 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
949 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
950 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
951 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
952 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
953 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
954 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
955 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
956 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
957 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
958
959 @cpython_only
960 def test_repeat_minmax_overflow_maxrepeat(self):
961 try:
962 from _sre import MAXREPEAT
963 except ImportError:
964 self.skipTest('requires _sre.MAXREPEAT constant')
965 string = "x" * 100000
966 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
967 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
968 (0, 100000))
969 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
970 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
971 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
972 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
973
R David Murray60773392013-04-14 13:08:50 -0400974 def test_backref_group_name_in_exception(self):
975 # Issue 17341: Poor error message when compiling invalid regex
976 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
977 re.compile('(?P=<foo>)')
978
979 def test_group_name_in_exception(self):
980 # Issue 17341: Poor error message when compiling invalid regex
981 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
982 re.compile('(?P<?foo>)')
983
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300984 def test_issue17998(self):
985 for reps in '*', '+', '?', '{1}':
986 for mod in '', '?':
987 pattern = '.' + reps + mod + 'yz'
988 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
989 ['xyz'], msg=pattern)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300990 if have_unicode:
991 pattern = unicode(pattern)
992 self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
993 [u'xyz'], msg=pattern)
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300994
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200995
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300996 def test_bug_2537(self):
997 # issue 2537: empty submatches
998 for outer_op in ('{0,}', '*', '+', '{1,187}'):
999 for inner_op in ('{0,}', '*', '?'):
1000 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1001 m = r.match("xyyzy")
1002 self.assertEqual(m.group(0), "xyy")
1003 self.assertEqual(m.group(1), "")
1004 self.assertEqual(m.group(2), "y")
1005
Antoine Pitrouf5814112014-02-03 20:59:59 +01001006 def test_debug_flag(self):
Serhiy Storchakac0799e32014-09-21 22:47:30 +03001007 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitrouf5814112014-02-03 20:59:59 +01001008 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +03001009 re.compile(pat, re.DEBUG)
1010 dump = '''\
1011subpattern 1
1012 literal 46
1013subpattern None
1014 branch
1015 in
1016 literal 99
1017 literal 104
1018 or
1019 literal 112
1020 literal 121
1021subpattern None
1022 groupref_exists 1
1023 at at_end
1024 else
1025 literal 58
1026 literal 32
1027'''
1028 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +01001029 # Debug output is output again even a second time (bypassing
1030 # the cache -- issue #20426).
1031 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +03001032 re.compile(pat, re.DEBUG)
1033 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +01001034
Serhiy Storchakae50fe4c2014-03-06 12:24:29 +02001035 def test_keyword_parameters(self):
1036 # Issue #20283: Accepting the string keyword parameter.
1037 pat = re.compile(r'(ab)')
1038 self.assertEqual(
1039 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1040 self.assertEqual(
1041 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1042 self.assertEqual(
1043 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1044 self.assertEqual(
1045 pat.split(string='abracadabra', maxsplit=1),
1046 ['', 'ab', 'racadabra'])
1047
Benjamin Petersonbc4ece52014-09-30 22:04:28 -04001048 def test_match_group_takes_long(self):
1049 self.assertEqual(re.match("(foo)", "foo").group(1L), "foo")
1050 self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1)
1051
Serhiy Storchakad4c72902014-10-31 00:53:19 +02001052 def test_locale_caching(self):
1053 # Issue #22410
1054 oldlocale = locale.setlocale(locale.LC_CTYPE)
1055 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1056 for loc in 'en_US.iso88591', 'en_US.utf8':
1057 try:
1058 locale.setlocale(locale.LC_CTYPE, loc)
1059 except locale.Error:
1060 # Unsupported locale on this system
1061 self.skipTest('test needs %s locale' % loc)
1062
1063 re.purge()
1064 self.check_en_US_iso88591()
1065 self.check_en_US_utf8()
1066 re.purge()
1067 self.check_en_US_utf8()
1068 self.check_en_US_iso88591()
1069
1070 def check_en_US_iso88591(self):
1071 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1072 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1073 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1074 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1075 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1076 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1077 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1078
1079 def check_en_US_utf8(self):
1080 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1081 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1082 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1083 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1084 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1085 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1086 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1087
Antoine Pitrouf5814112014-02-03 20:59:59 +01001088
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001089def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +00001090 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001091 if verbose:
1092 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001093 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001094 # To save time, only run the first and last 10 tests
1095 #tests = tests[:10] + tests[-10:]
1096 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001097
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001098 for t in tests:
1099 sys.stdout.flush()
1100 pattern = s = outcome = repl = expected = None
1101 if len(t) == 5:
1102 pattern, s, outcome, repl, expected = t
1103 elif len(t) == 3:
1104 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001105 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001106 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
1107
Guido van Rossum41360a41998-03-26 19:42:58 +00001108 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001109 obj = re.compile(pattern)
1110 except re.error:
1111 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001112 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001113 print '=== Syntax error:', t
1114 except KeyboardInterrupt: raise KeyboardInterrupt
1115 except:
1116 print '*** Unexpected error ***', t
1117 if verbose:
1118 traceback.print_exc(file=sys.stdout)
1119 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001120 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001121 result = obj.search(s)
1122 except re.error, msg:
1123 print '=== Unexpected exception', t, repr(msg)
1124 if outcome == SYNTAX_ERROR:
1125 # This should have been a syntax error; forget it.
1126 pass
1127 elif outcome == FAIL:
1128 if result is None: pass # No match, as expected
1129 else: print '=== Succeeded incorrectly', t
1130 elif outcome == SUCCEED:
1131 if result is not None:
1132 # Matched, as expected, so now we compute the
1133 # result string and compare it to our expected result.
1134 start, end = result.span(0)
1135 vardict={'found': result.group(0),
1136 'groups': result.group(),
1137 'flags': result.re.flags}
1138 for i in range(1, 100):
1139 try:
1140 gi = result.group(i)
1141 # Special hack because else the string concat fails:
1142 if gi is None:
1143 gi = "None"
1144 except IndexError:
1145 gi = "Error"
1146 vardict['g%d' % i] = gi
1147 for i in result.re.groupindex.keys():
1148 try:
1149 gi = result.group(i)
1150 if gi is None:
1151 gi = "None"
1152 except IndexError:
1153 gi = "Error"
1154 vardict[i] = gi
1155 repl = eval(repl, vardict)
1156 if repl != expected:
1157 print '=== grouping error', t,
1158 print repr(repl) + ' should be ' + repr(expected)
1159 else:
1160 print '=== Failed incorrectly', t
1161
1162 # Try the match on a unicode string, and check that it
1163 # still succeeds.
1164 try:
1165 result = obj.search(unicode(s, "latin-1"))
1166 if result is None:
1167 print '=== Fails on unicode match', t
1168 except NameError:
1169 continue # 1.5.2
1170 except TypeError:
1171 continue # unicode test case
1172
1173 # Try the match on a unicode pattern, and check that it
1174 # still succeeds.
1175 obj=re.compile(unicode(pattern, "latin-1"))
1176 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001177 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001178 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001179
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001180 # Try the match with the search area limited to the extent
1181 # of the match and see if it still succeeds. \B will
1182 # break (because it won't match at the end or start of a
1183 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001184
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001185 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1186 and result is not None:
1187 obj = re.compile(pattern)
1188 result = obj.search(s, result.start(0), result.end(0) + 1)
1189 if result is None:
1190 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001191
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001192 # Try the match with IGNORECASE enabled, and check that it
1193 # still succeeds.
1194 obj = re.compile(pattern, re.IGNORECASE)
1195 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001196 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001197 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001198
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001199 # Try the match with LOCALE enabled, and check that it
1200 # still succeeds.
1201 obj = re.compile(pattern, re.LOCALE)
1202 result = obj.search(s)
1203 if result is None:
1204 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001205
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001206 # Try the match with UNICODE locale enabled, and check
1207 # that it still succeeds.
1208 obj = re.compile(pattern, re.UNICODE)
1209 result = obj.search(s)
1210 if result is None:
1211 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001212
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001213def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001214 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001215 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001216
1217if __name__ == "__main__":
1218 test_main()