blob: d85b767f12ca3dc257a65d3a0ef4d247a7640e28 [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
Antoine Pitroud2cc7432014-02-03 20:59:59 +01002 cpython_only, captured_stdout
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00005from re import Scanner
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02006import sre_compile
R David Murray26dfaac92013-04-14 13:00:54 -04007import sre_constants
Ezio Melottid2114eb2011-03-25 14:08:44 +02008import sys
9import string
10import traceback
Antoine Pitrou79aa68d2013-10-25 21:36:10 +020011import unittest
Raymond Hettinger027bb632004-05-31 03:09:25 +000012from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000013
Guido van Rossum23b22571997-07-17 22:36:14 +000014# Misc tests from Tim Peters' re.doc
15
Just van Rossum6802c6e2003-07-02 14:36:59 +000016# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020017# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000018# cover most of the code.
19
Serhiy Storchaka25324972013-10-16 12:46:28 +030020class S(str):
21 def __getitem__(self, index):
22 return S(super().__getitem__(index))
23
24class B(bytes):
25 def __getitem__(self, index):
26 return B(super().__getitem__(index))
27
Skip Montanaro8ed06da2003-04-24 19:43:18 +000028class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000029
Serhiy Storchaka25324972013-10-16 12:46:28 +030030 def assertTypedEqual(self, actual, expect, msg=None):
31 self.assertEqual(actual, expect, msg)
32 def recurse(actual, expect):
33 if isinstance(expect, (tuple, list)):
34 for x, y in zip(actual, expect):
35 recurse(x, y)
36 else:
37 self.assertIs(type(actual), type(expect), msg)
38 recurse(actual, expect)
39
Benjamin Petersone48944b2012-03-07 14:50:25 -060040 def test_keep_buffer(self):
41 # See bug 14212
42 b = bytearray(b'x')
43 it = re.finditer(b'a', b)
44 with self.assertRaises(BufferError):
45 b.extend(b'x'*400)
46 list(it)
47 del it
48 gc_collect()
49 b.extend(b'x'*400)
50
Raymond Hettinger027bb632004-05-31 03:09:25 +000051 def test_weakref(self):
52 s = 'QabbbcR'
53 x = re.compile('ab+c')
54 y = proxy(x)
55 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
56
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 def test_search_star_plus(self):
58 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
59 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
60 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
61 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030062 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000063 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
64 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
65 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
66 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030067 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000068
Skip Montanaro8ed06da2003-04-24 19:43:18 +000069 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000070 int_value = int(matchobj.group(0))
71 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000072
Skip Montanaro8ed06da2003-04-24 19:43:18 +000073 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030074 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
75 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
76 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
77 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
78 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
79 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030080 for y in ("\xe0", "\u0430", "\U0001d49c"):
81 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +030082
Skip Montanaro8ed06da2003-04-24 19:43:18 +000083 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
84 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
85 '9.3 -3 24x100y')
86 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
87 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000088
Skip Montanaro8ed06da2003-04-24 19:43:18 +000089 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
90 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000091
Skip Montanaro8ed06da2003-04-24 19:43:18 +000092 s = r"\1\1"
93 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
94 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
95 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000096
Skip Montanaro8ed06da2003-04-24 19:43:18 +000097 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
98 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
99 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
100 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000101
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000102 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
103 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
104 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
105 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
106 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +0000107
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000109
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000110 def test_bug_449964(self):
111 # fails for group followed by other escape
112 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
113 'xx\bxx\b')
114
115 def test_bug_449000(self):
116 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000117 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
118 'abc\ndef\n')
119 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
120 'abc\ndef\n')
121 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
122 'abc\ndef\n')
123 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
124 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000125
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000126 def test_bug_1661(self):
127 # Verify that flags do not get silently ignored with compiled patterns
128 pattern = re.compile('.')
129 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
130 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
131 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
132 self.assertRaises(ValueError, re.compile, pattern, re.I)
133
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000134 def test_bug_3629(self):
135 # A regex that triggered a bug in the sre-code validator
136 re.compile("(?P<quote>)(?(quote))")
137
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000138 def test_sub_template_numeric_escape(self):
139 # bug 776311 and friends
140 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
141 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
142 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
143 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
144 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
145 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
146 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
147
148 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
149 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
150
151 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
152 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
153 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
154 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
155 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
156
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300157 self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
158 self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
Tim Peters0e9980f2004-09-12 03:49:31 +0000159
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000160 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
161 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
162 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
163 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
164 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
165 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
166 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
167 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
168 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
169 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
170 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
171 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
172
173 # in python2.3 (etc), these loop endlessly in sre_parser.py
174 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
175 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
176 'xz8')
177 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
178 'xza')
179
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000180 def test_qualified_re_sub(self):
181 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
182 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000183
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000184 def test_bug_114660(self):
185 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
186 'hello there')
187
188 def test_bug_462270(self):
189 # Test for empty sub() behaviour, see SF bug #462270
190 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
191 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
192
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200193 def test_symbolic_groups(self):
194 re.compile('(?P<a>x)(?P=a)(?(a)y)')
195 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
196 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
197 self.assertRaises(re.error, re.compile, '(?Px)')
198 self.assertRaises(re.error, re.compile, '(?P=)')
199 self.assertRaises(re.error, re.compile, '(?P=1)')
200 self.assertRaises(re.error, re.compile, '(?P=a)')
201 self.assertRaises(re.error, re.compile, '(?P=a1)')
202 self.assertRaises(re.error, re.compile, '(?P=a.)')
203 self.assertRaises(re.error, re.compile, '(?P<)')
204 self.assertRaises(re.error, re.compile, '(?P<>)')
205 self.assertRaises(re.error, re.compile, '(?P<1>)')
206 self.assertRaises(re.error, re.compile, '(?P<a.>)')
207 self.assertRaises(re.error, re.compile, '(?())')
208 self.assertRaises(re.error, re.compile, '(?(a))')
209 self.assertRaises(re.error, re.compile, '(?(1a))')
210 self.assertRaises(re.error, re.compile, '(?(a.))')
Georg Brandl1d472b72013-04-14 11:40:00 +0200211 # New valid/invalid identifiers in Python 3
212 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
213 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
214 self.assertRaises(re.error, re.compile, '(?P<©>x)')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200215
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000216 def test_symbolic_refs(self):
217 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
218 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
219 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
220 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200221 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000222 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
223 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
224 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
225 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000226 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Georg Brandl1d472b72013-04-14 11:40:00 +0200227 # New valid/invalid identifiers in Python 3
228 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
229 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
230 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000231
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000232 def test_re_subn(self):
233 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
234 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
235 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
236 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
237 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000238
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000239 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300240 for string in ":a:b::c", S(":a:b::c"):
241 self.assertTypedEqual(re.split(":", string),
242 ['', 'a', 'b', '', 'c'])
243 self.assertTypedEqual(re.split(":*", string),
244 ['', 'a', 'b', 'c'])
245 self.assertTypedEqual(re.split("(:*)", string),
246 ['', ':', 'a', ':', 'b', '::', 'c'])
247 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
248 memoryview(b":a:b::c")):
249 self.assertTypedEqual(re.split(b":", string),
250 [b'', b'a', b'b', b'', b'c'])
251 self.assertTypedEqual(re.split(b":*", string),
252 [b'', b'a', b'b', b'c'])
253 self.assertTypedEqual(re.split(b"(:*)", string),
254 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300255 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
256 "\U0001d49c\U0001d49e\U0001d4b5"):
257 string = ":%s:%s::%s" % (a, b, c)
258 self.assertEqual(re.split(":", string), ['', a, b, '', c])
259 self.assertEqual(re.split(":*", string), ['', a, b, c])
260 self.assertEqual(re.split("(:*)", string),
261 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300262
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000263 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
264 self.assertEqual(re.split("(:)*", ":a:b::c"),
265 ['', ':', 'a', ':', 'b', ':', 'c'])
266 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
267 ['', ':', 'a', ':b::', 'c'])
268 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
269 ['', None, ':', 'a', None, ':', '', 'b', None, '',
270 None, '::', 'c'])
271 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
272 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000273
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000274 def test_qualified_re_split(self):
275 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
276 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
277 self.assertEqual(re.split("(:)", ":a:b::c", 2),
278 ['', ':', 'a', ':', 'b::c'])
279 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
280 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000281
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000282 def test_re_findall(self):
283 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300284 for string in "a:b::c:::d", S("a:b::c:::d"):
285 self.assertTypedEqual(re.findall(":+", string),
286 [":", "::", ":::"])
287 self.assertTypedEqual(re.findall("(:+)", string),
288 [":", "::", ":::"])
289 self.assertTypedEqual(re.findall("(:)(:*)", string),
290 [(":", ""), (":", ":"), (":", "::")])
291 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
292 memoryview(b"a:b::c:::d")):
293 self.assertTypedEqual(re.findall(b":+", string),
294 [b":", b"::", b":::"])
295 self.assertTypedEqual(re.findall(b"(:+)", string),
296 [b":", b"::", b":::"])
297 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
298 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300299 for x in ("\xe0", "\u0430", "\U0001d49c"):
300 xx = x * 2
301 xxx = x * 3
302 string = "a%sb%sc%sd" % (x, xx, xxx)
303 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
304 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
305 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
306 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000307
Skip Montanaro5ba00542003-04-25 16:00:14 +0000308 def test_bug_117612(self):
309 self.assertEqual(re.findall(r"(a|(b))", "aba"),
310 [("a", ""),("b", "b"),("a", "")])
311
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000312 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300313 for string in 'a', S('a'):
314 self.assertEqual(re.match('a', string).groups(), ())
315 self.assertEqual(re.match('(a)', string).groups(), ('a',))
316 self.assertEqual(re.match('(a)', string).group(0), 'a')
317 self.assertEqual(re.match('(a)', string).group(1), 'a')
318 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
319 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
320 self.assertEqual(re.match(b'a', string).groups(), ())
321 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
322 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
323 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
324 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300325 for a in ("\xe0", "\u0430", "\U0001d49c"):
326 self.assertEqual(re.match(a, a).groups(), ())
327 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
328 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
329 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
330 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000331
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000332 pat = re.compile('((a)|(b))(c)?')
333 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
334 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
335 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
336 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
337 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000338
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000339 # A single group
340 m = re.match('(a)', 'a')
341 self.assertEqual(m.group(0), 'a')
342 self.assertEqual(m.group(0), 'a')
343 self.assertEqual(m.group(1), 'a')
344 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000345
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000346 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
347 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
348 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
349 (None, 'b', None))
350 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000351
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200352 def test_re_fullmatch(self):
353 # Issue 16203: Proposal: add re.fullmatch() method.
354 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
355 for string in "ab", S("ab"):
356 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
357 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
358 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
359 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
360 r = r"%s|%s" % (a, a + b)
361 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
362 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
363 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
364 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
365 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
366 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
367 self.assertIsNone(re.fullmatch(r"a+", "ab"))
368 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
369 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
370 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
371 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
372 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
373 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
374
375 self.assertEqual(
376 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
377 self.assertEqual(
378 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
379 self.assertEqual(
380 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
381
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000382 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000383 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
384 ('(', 'a'))
385 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
386 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300387 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
388 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000389 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
390 ('a', 'b'))
391 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
392 (None, 'd'))
393 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
394 (None, 'd'))
395 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
396 ('a', ''))
397
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000398 # Tests for bug #1177831: exercise groups other than the first group
399 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
400 self.assertEqual(p.match('abc').groups(),
401 ('a', 'b', 'c'))
402 self.assertEqual(p.match('ad').groups(),
403 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300404 self.assertIsNone(p.match('abd'))
405 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000406
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000407
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000408 def test_re_groupref(self):
409 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
410 ('|', 'a'))
411 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
412 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300413 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
414 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000415 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
416 ('a', 'a'))
417 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
418 (None, None))
419
420 def test_groupdict(self):
421 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
422 'first second').groupdict(),
423 {'first':'first', 'second':'second'})
424
425 def test_expand(self):
426 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
427 "first second")
428 .expand(r"\2 \1 \g<second> \g<first>"),
429 "second first second first")
430
431 def test_repeat_minmax(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300432 self.assertIsNone(re.match("^(\w){1}$", "abc"))
433 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
434 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
435 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000436
437 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
438 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
439 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
440 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
441 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
442 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
443 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
444 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
445
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300446 self.assertIsNone(re.match("^x{1}$", "xxx"))
447 self.assertIsNone(re.match("^x{1}?$", "xxx"))
448 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
449 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000450
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300451 self.assertTrue(re.match("^x{3}$", "xxx"))
452 self.assertTrue(re.match("^x{1,3}$", "xxx"))
453 self.assertTrue(re.match("^x{1,4}$", "xxx"))
454 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
455 self.assertTrue(re.match("^x{3}?$", "xxx"))
456 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
457 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
458 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000459
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300460 self.assertIsNone(re.match("^x{}$", "xxx"))
461 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000462
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000463 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000464 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000465 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000466 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
467 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
468 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
469 {'first': 1, 'other': 2})
470
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000471 self.assertEqual(re.match("(a)", "a").pos, 0)
472 self.assertEqual(re.match("(a)", "a").endpos, 1)
473 self.assertEqual(re.match("(a)", "a").string, "a")
474 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300475 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000476
477 def test_special_escapes(self):
478 self.assertEqual(re.search(r"\b(b.)\b",
479 "abcd abc bcd bx").group(1), "bx")
480 self.assertEqual(re.search(r"\B(b.)\B",
481 "abc bcd bc abxd").group(1), "bx")
482 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300483 "abcd abc bcd bx", re.ASCII).group(1), "bx")
484 self.assertEqual(re.search(r"\B(b.)\B",
485 "abc bcd bc abxd", re.ASCII).group(1), "bx")
486 self.assertEqual(re.search(r"\b(b.)\b",
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000487 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
488 self.assertEqual(re.search(r"\B(b.)\B",
489 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000490 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
491 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300492 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300493 self.assertEqual(re.search(br"\b(b.)\b",
494 b"abcd abc bcd bx").group(1), b"bx")
495 self.assertEqual(re.search(br"\B(b.)\B",
496 b"abc bcd bc abxd").group(1), b"bx")
497 self.assertEqual(re.search(br"\b(b.)\b",
498 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
499 self.assertEqual(re.search(br"\B(b.)\B",
500 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
501 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
502 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300503 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000504 self.assertEqual(re.search(r"\d\D\w\W\s\S",
505 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300506 self.assertEqual(re.search(br"\d\D\w\W\s\S",
507 b"1aa! a").group(0), b"1aa! a")
508 self.assertEqual(re.search(r"\d\D\w\W\s\S",
509 "1aa! a", re.ASCII).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000510 self.assertEqual(re.search(r"\d\D\w\W\s\S",
511 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300512 self.assertEqual(re.search(br"\d\D\w\W\s\S",
513 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000514
Ezio Melotti5a045b92012-02-29 11:48:44 +0200515 def test_string_boundaries(self):
516 # See http://bugs.python.org/issue10713
517 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
518 "abc")
519 # There's a word boundary at the start of a string.
520 self.assertTrue(re.match(r"\b", "abc"))
521 # A non-empty string includes a non-boundary zero-length match.
522 self.assertTrue(re.search(r"\B", "abc"))
523 # There is no non-boundary match at the start of a string.
524 self.assertFalse(re.match(r"\B", "abc"))
525 # However, an empty string contains no word boundaries, and also no
526 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300527 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200528 # This one is questionable and different from the perlre behaviour,
529 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300530 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200531 # A single word-character string has two boundaries, but no
532 # non-boundary gaps.
533 self.assertEqual(len(re.findall(r"\b", "a")), 2)
534 self.assertEqual(len(re.findall(r"\B", "a")), 0)
535 # If there are no words, there are no boundaries
536 self.assertEqual(len(re.findall(r"\b", " ")), 0)
537 self.assertEqual(len(re.findall(r"\b", " ")), 0)
538 # Can match around the whitespace.
539 self.assertEqual(len(re.findall(r"\B", " ")), 2)
540
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000541 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000542 self.assertEqual(re.match("([\u2222\u2223])",
543 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300544 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300545 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000546
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100547 def test_big_codesize(self):
548 # Issue #1160
549 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300550 self.assertTrue(r.match('1000'))
551 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100552
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000553 def test_anyall(self):
554 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
555 "a\nb")
556 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
557 "a\n\nb")
558
559 def test_non_consuming(self):
560 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
561 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
562 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
563 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
564 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
565 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
566 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
567
568 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
569 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
570 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
571 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
572
573 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000574 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300575 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000576 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
577 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
578 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
579 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
580 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
581 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
582 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
583 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
584
585 def test_category(self):
586 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
587
588 def test_getlower(self):
589 import _sre
590 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
591 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
592 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
593
594 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300595 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000596
597 def test_not_literal(self):
598 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
599 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
600
601 def test_search_coverage(self):
602 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
603 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
604
Ezio Melottid2114eb2011-03-25 14:08:44 +0200605 def assertMatch(self, pattern, text, match=None, span=None,
606 matcher=re.match):
607 if match is None and span is None:
608 # the pattern matches the whole text
609 match = text
610 span = (0, len(text))
611 elif match is None or span is None:
612 raise ValueError('If match is not None, span should be specified '
613 '(and vice versa).')
614 m = matcher(pattern, text)
615 self.assertTrue(m)
616 self.assertEqual(m.group(), match)
617 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000618
Ezio Melottid2114eb2011-03-25 14:08:44 +0200619 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300620 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200621 p = ''.join(chr(i) for i in range(256))
622 for c in p:
623 if c in alnum_chars:
624 self.assertEqual(re.escape(c), c)
625 elif c == '\x00':
626 self.assertEqual(re.escape(c), '\\000')
627 else:
628 self.assertEqual(re.escape(c), '\\' + c)
629 self.assertMatch(re.escape(c), c)
630 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000631
Guido van Rossum698280d2008-09-10 17:44:35 +0000632 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300633 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200634 p = bytes(range(256))
635 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000636 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200637 if b in alnum_chars:
638 self.assertEqual(re.escape(b), b)
639 elif i == 0:
640 self.assertEqual(re.escape(b), b'\\000')
641 else:
642 self.assertEqual(re.escape(b), b'\\' + b)
643 self.assertMatch(re.escape(b), b)
644 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000645
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200646 def test_re_escape_non_ascii(self):
647 s = 'xxx\u2620\u2620\u2620xxx'
648 s_escaped = re.escape(s)
649 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
650 self.assertMatch(s_escaped, s)
651 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
652 'x\u2620\u2620\u2620x', (2, 7), re.search)
653
654 def test_re_escape_non_ascii_bytes(self):
655 b = 'y\u2620y\u2620y'.encode('utf-8')
656 b_escaped = re.escape(b)
657 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
658 self.assertMatch(b_escaped, b)
659 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
660 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000661
Serhiy Storchakab85a9762014-09-15 11:33:19 +0300662 def test_pickling(self):
663 import pickle
664 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
665 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
666 pickled = pickle.dumps(oldpat, proto)
667 newpat = pickle.loads(pickled)
668 self.assertEqual(newpat, oldpat)
669 # current pickle expects the _compile() reconstructor in re module
670 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +0000671
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000672 def test_constants(self):
673 self.assertEqual(re.I, re.IGNORECASE)
674 self.assertEqual(re.L, re.LOCALE)
675 self.assertEqual(re.M, re.MULTILINE)
676 self.assertEqual(re.S, re.DOTALL)
677 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000678
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000679 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000680 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300681 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000682
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000683 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200684 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
685 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300686 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
687 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
688 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
689 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
690 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
691 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200692 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300693 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
694 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
695 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
696 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
697 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
698 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
699 self.assertTrue(re.match(r"\0", "\000"))
700 self.assertTrue(re.match(r"\08", "\0008"))
701 self.assertTrue(re.match(r"\01", "\001"))
702 self.assertTrue(re.match(r"\018", "\0018"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300703 self.assertRaises(re.error, re.match, r"\567", "")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200704 self.assertRaises(re.error, re.match, r"\911", "")
705 self.assertRaises(re.error, re.match, r"\x1", "")
706 self.assertRaises(re.error, re.match, r"\x1z", "")
707 self.assertRaises(re.error, re.match, r"\u123", "")
708 self.assertRaises(re.error, re.match, r"\u123z", "")
709 self.assertRaises(re.error, re.match, r"\U0001234", "")
710 self.assertRaises(re.error, re.match, r"\U0001234z", "")
711 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000712
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000713 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200714 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
715 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300716 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
717 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
718 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
719 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
720 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
721 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
722 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
723 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200724 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300725 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
726 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
727 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
728 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
729 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
730 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300731 self.assertRaises(re.error, re.match, r"[\567]", "")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200732 self.assertRaises(re.error, re.match, r"[\911]", "")
733 self.assertRaises(re.error, re.match, r"[\x1z]", "")
734 self.assertRaises(re.error, re.match, r"[\u123z]", "")
735 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
736 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300737 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200738
739 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000740 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300741 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
742 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
743 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
744 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
745 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
746 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
747 self.assertTrue(re.match(br"\u", b'u'))
748 self.assertTrue(re.match(br"\U", b'U'))
749 self.assertTrue(re.match(br"\0", b"\000"))
750 self.assertTrue(re.match(br"\08", b"\0008"))
751 self.assertTrue(re.match(br"\01", b"\001"))
752 self.assertTrue(re.match(br"\018", b"\0018"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300753 self.assertRaises(re.error, re.match, br"\567", b"")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200754 self.assertRaises(re.error, re.match, br"\911", b"")
755 self.assertRaises(re.error, re.match, br"\x1", b"")
756 self.assertRaises(re.error, re.match, br"\x1z", b"")
757
758 def test_sre_byte_class_literals(self):
759 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300760 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
761 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
762 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
763 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
764 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
765 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
766 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
767 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
768 self.assertTrue(re.match(br"[\u]", b'u'))
769 self.assertTrue(re.match(br"[\U]", b'U'))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300770 self.assertRaises(re.error, re.match, br"[\567]", b"")
Serhiy Storchakacd9032d2014-09-23 23:04:21 +0300771 self.assertRaises(re.error, re.match, br"[\911]", b"")
772 self.assertRaises(re.error, re.match, br"[\x1z]", b"")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000773
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000774 def test_bug_113254(self):
775 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
776 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
777 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
778
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000779 def test_bug_527371(self):
780 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300781 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000782 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
783 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
784 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
785 self.assertEqual(re.match("((a))", "a").lastindex, 1)
786
787 def test_bug_545855(self):
788 # bug 545855 -- This pattern failed to cause a compile error as it
789 # should, instead provoking a TypeError.
790 self.assertRaises(re.error, re.compile, 'foo[a-')
791
792 def test_bug_418626(self):
793 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
794 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
795 # pattern '*?' on a long string.
796 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
797 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
798 20003)
799 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000800 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000801 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000802 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000803
804 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000805 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000806 self.assertEqual(re.compile(pat) and 1, 1)
807
Skip Montanaro1e703c62003-04-25 15:40:28 +0000808 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000809 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000810 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000811 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
812 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
813 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000814
Serhiy Storchakafa468162013-02-16 21:23:53 +0200815 def test_unlimited_zero_width_repeat(self):
816 # Issue #9669
817 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
818 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
819 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
820 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
821 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
822 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
823
Skip Montanaro1e703c62003-04-25 15:40:28 +0000824 def test_scanner(self):
825 def s_ident(scanner, token): return token
826 def s_operator(scanner, token): return "op%s" % token
827 def s_float(scanner, token): return float(token)
828 def s_int(scanner, token): return int(token)
829
830 scanner = Scanner([
831 (r"[a-zA-Z_]\w*", s_ident),
832 (r"\d+\.\d*", s_float),
833 (r"\d+", s_int),
834 (r"=|\+|-|\*|/", s_operator),
835 (r"\s+", None),
836 ])
837
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300838 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000839
Skip Montanaro1e703c62003-04-25 15:40:28 +0000840 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
841 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
842 'op+', 'bar'], ''))
843
Skip Montanaro5ba00542003-04-25 16:00:14 +0000844 def test_bug_448951(self):
845 # bug 448951 (similar to 429357, but with single char match)
846 # (Also test greedy matches.)
847 for op in '','?','*':
848 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
849 (None, None))
850 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
851 ('a:', 'a'))
852
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000853 def test_bug_725106(self):
854 # capturing groups in alternatives in repeats
855 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
856 ('b', 'a'))
857 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
858 ('c', 'b'))
859 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
860 ('b', None))
861 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
862 ('b', None))
863 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
864 ('b', 'a'))
865 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
866 ('c', 'b'))
867 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
868 ('b', None))
869 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
870 ('b', None))
871
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000872 def test_bug_725149(self):
873 # mark_stack_base restoring before restoring marks
874 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
875 ('a', None))
876 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
877 ('a', None, None))
878
Just van Rossum12723ba2003-07-02 20:03:04 +0000879 def test_bug_764548(self):
880 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000881 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000882 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300883 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000884
Skip Montanaro5ba00542003-04-25 16:00:14 +0000885 def test_finditer(self):
886 iter = re.finditer(r":+", "a:b::c:::d")
887 self.assertEqual([item.group(0) for item in iter],
888 [":", "::", ":::"])
889
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600890 pat = re.compile(r":+")
891 iter = pat.finditer("a:b::c:::d", 1, 10)
892 self.assertEqual([item.group(0) for item in iter],
893 [":", "::", ":::"])
894
895 pat = re.compile(r":+")
896 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
897 self.assertEqual([item.group(0) for item in iter],
898 [":", "::", ":::"])
899
900 pat = re.compile(r":+")
901 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
902 self.assertEqual([item.group(0) for item in iter],
903 [":", "::", ":::"])
904
905 pat = re.compile(r":+")
906 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
907 self.assertEqual([item.group(0) for item in iter],
908 ["::", "::"])
909
Thomas Wouters40a088d2008-03-18 20:19:54 +0000910 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300911 self.assertIsNot(re.compile('bug_926075'),
912 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000913
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000914 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300915 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000916 self.assertEqual(re.compile(pattern).split("a.b.c"),
917 ['a','b','c'])
918
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000919 def test_bug_581080(self):
920 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000921 self.assertEqual(next(iter).span(), (1,2))
922 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000923
924 scanner = re.compile(r"\s").scanner("a b")
925 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300926 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000927
928 def test_bug_817234(self):
929 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000930 self.assertEqual(next(iter).span(), (0, 4))
931 self.assertEqual(next(iter).span(), (4, 4))
932 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000933
Mark Dickinson1f268282009-07-28 17:22:36 +0000934 def test_bug_6561(self):
935 # '\d' should match characters in Unicode category 'Nd'
936 # (Number, Decimal Digit), but not those in 'Nl' (Number,
937 # Letter) or 'No' (Number, Other).
938 decimal_digits = [
939 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
940 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
941 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
942 ]
943 for x in decimal_digits:
944 self.assertEqual(re.match('^\d$', x).group(0), x)
945
946 not_decimal_digits = [
947 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
948 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
949 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
950 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
951 ]
952 for x in not_decimal_digits:
953 self.assertIsNone(re.match('^\d$', x))
954
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955 def test_empty_array(self):
956 # SF buf 1647541
957 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000958 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300960 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +0000961 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000962
Christian Heimes072c0f12008-01-03 23:01:04 +0000963 def test_inline_flags(self):
964 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000965 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
966 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000967
968 p = re.compile(upper_char, re.I | re.U)
969 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300970 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000971
972 p = re.compile(lower_char, re.I | re.U)
973 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300974 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000975
976 p = re.compile('(?i)' + upper_char, re.U)
977 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300978 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000979
980 p = re.compile('(?i)' + lower_char, re.U)
981 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300982 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000983
984 p = re.compile('(?iu)' + upper_char)
985 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300986 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000987
988 p = re.compile('(?iu)' + lower_char)
989 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300990 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000991
Christian Heimes25bb7832008-01-11 16:17:00 +0000992 def test_dollar_matches_twice(self):
993 "$ matches the end of string, and just before the terminating \n"
994 pattern = re.compile('$')
995 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
996 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
997 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
998
999 pattern = re.compile('$', re.MULTILINE)
1000 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1001 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1002 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1003
Antoine Pitroufd036452008-08-19 17:56:33 +00001004 def test_bytes_str_mixing(self):
1005 # Mixing str and bytes is disallowed
1006 pat = re.compile('.')
1007 bpat = re.compile(b'.')
1008 self.assertRaises(TypeError, pat.match, b'b')
1009 self.assertRaises(TypeError, bpat.match, 'b')
1010 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1011 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1012 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1013 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1014 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1015 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1016
1017 def test_ascii_and_unicode_flag(self):
1018 # String patterns
1019 for flags in (0, re.UNICODE):
1020 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001021 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001022 pat = re.compile('\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001023 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001024 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001025 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001026 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001027 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001028 pat = re.compile('\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001029 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001030 pat = re.compile('(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001031 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001032 # Bytes patterns
1033 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001034 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001035 self.assertIsNone(pat.match(b'\xe0'))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001036 pat = re.compile(b'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001037 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001038 # Incompatibilities
1039 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
1040 self.assertRaises(ValueError, re.compile, b'(?u)\w')
1041 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
1042 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
1043 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
1044 self.assertRaises(ValueError, re.compile, '(?au)\w')
1045
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001046 def test_bug_6509(self):
1047 # Replacement strings of both types must parse properly.
1048 # all strings
1049 pat = re.compile('a(\w)')
1050 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1051 pat = re.compile('a(.)')
1052 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1053 pat = re.compile('..')
1054 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1055
1056 # all bytes
1057 pat = re.compile(b'a(\w)')
1058 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1059 pat = re.compile(b'a(.)')
1060 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1061 pat = re.compile(b'..')
1062 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1063
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001064 def test_dealloc(self):
1065 # issue 3299: check for segfault in debug build
1066 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001067 # the overflow limit is different on wide and narrow builds and it
1068 # depends on the definition of SRE_CODE (see sre.h).
1069 # 2**128 should be big enough to overflow on both. For smaller values
1070 # a RuntimeError is raised instead of OverflowError.
1071 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001072 self.assertRaises(TypeError, re.finditer, "a", {})
1073 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +00001074 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001075
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001077 self.assertTrue(re.search("123.*-", '123abc-'))
1078 self.assertTrue(re.search("123.*-", '123\xe9-'))
1079 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1080 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1081 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082
Ezio Melottidf723e12012-03-13 01:29:48 +02001083 def test_compile(self):
1084 # Test return value when given string and pattern as parameter
1085 pattern = re.compile('random pattern')
1086 self.assertIsInstance(pattern, re._pattern_type)
1087 same_pattern = re.compile(pattern)
1088 self.assertIsInstance(same_pattern, re._pattern_type)
1089 self.assertIs(same_pattern, pattern)
1090 # Test behaviour when not given a string or pattern as parameter
1091 self.assertRaises(TypeError, re.compile, 0)
1092
Ezio Melottife8e6e72013-01-11 08:32:01 +02001093 def test_bug_13899(self):
1094 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
1095 # nothing. Ditto B and Z.
1096 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1097 ['A', 'B', '\b', 'C', 'Z'])
1098
Antoine Pitroub33941a2012-12-03 20:55:56 +01001099 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001100 def test_large_search(self, size):
1101 # Issue #10182: indices were 32-bit-truncated.
1102 s = 'a' * size
1103 m = re.search('$', s)
1104 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001105 self.assertEqual(m.start(), size)
1106 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001107
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001108 # The huge memuse is because of re.sub() using a list and a join()
1109 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001110 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001111 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001112 # Issue #10182: indices were 32-bit-truncated.
1113 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001114 r, n = re.subn('', '', s)
1115 self.assertEqual(r, s)
1116 self.assertEqual(n, size + 1)
1117
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001118 def test_bug_16688(self):
1119 # Issue 16688: Backreferences make case-insensitive regex fail on
1120 # non-ASCII strings.
1121 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1122 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001123
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001124 def test_repeat_minmax_overflow(self):
1125 # Issue #13169
1126 string = "x" * 100000
1127 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1128 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1129 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1130 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1131 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1132 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1133 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1134 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1135 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1136 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1137 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1138
1139 @cpython_only
1140 def test_repeat_minmax_overflow_maxrepeat(self):
1141 try:
1142 from _sre import MAXREPEAT
1143 except ImportError:
1144 self.skipTest('requires _sre.MAXREPEAT constant')
1145 string = "x" * 100000
1146 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1147 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1148 (0, 100000))
1149 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1150 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1151 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1152 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1153
R David Murray26dfaac92013-04-14 13:00:54 -04001154 def test_backref_group_name_in_exception(self):
1155 # Issue 17341: Poor error message when compiling invalid regex
1156 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1157 re.compile('(?P=<foo>)')
1158
1159 def test_group_name_in_exception(self):
1160 # Issue 17341: Poor error message when compiling invalid regex
1161 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1162 re.compile('(?P<?foo>)')
1163
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001164 def test_issue17998(self):
1165 for reps in '*', '+', '?', '{1}':
1166 for mod in '', '?':
1167 pattern = '.' + reps + mod + 'yz'
1168 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1169 ['xyz'], msg=pattern)
1170 pattern = pattern.encode()
1171 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1172 [b'xyz'], msg=pattern)
1173
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001174 def test_match_repr(self):
1175 for string in '[abracadabra]', S('[abracadabra]'):
1176 m = re.search(r'(.+)(.*?)\1', string)
1177 self.assertEqual(repr(m), "<%s.%s object; "
1178 "span=(1, 12), match='abracadabra'>" %
1179 (type(m).__module__, type(m).__qualname__))
1180 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1181 bytearray(b'[abracadabra]'),
1182 memoryview(b'[abracadabra]')):
1183 m = re.search(rb'(.+)(.*?)\1', string)
1184 self.assertEqual(repr(m), "<%s.%s object; "
1185 "span=(1, 12), match=b'abracadabra'>" %
1186 (type(m).__module__, type(m).__qualname__))
1187
1188 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1189 self.assertEqual(repr(first), "<%s.%s object; "
1190 "span=(0, 2), match='aa'>" %
1191 (type(second).__module__, type(first).__qualname__))
1192 self.assertEqual(repr(second), "<%s.%s object; "
1193 "span=(3, 5), match='bb'>" %
1194 (type(second).__module__, type(second).__qualname__))
1195
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001196
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001197 def test_bug_2537(self):
1198 # issue 2537: empty submatches
1199 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1200 for inner_op in ('{0,}', '*', '?'):
1201 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1202 m = r.match("xyyzy")
1203 self.assertEqual(m.group(0), "xyy")
1204 self.assertEqual(m.group(1), "")
1205 self.assertEqual(m.group(2), "y")
1206
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001207 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001208 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001209 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001210 re.compile(pat, re.DEBUG)
1211 dump = '''\
1212subpattern 1
1213 literal 46
1214subpattern None
1215 branch
1216 in
1217 literal 99
1218 literal 104
1219 or
1220 literal 112
1221 literal 121
1222subpattern None
1223 groupref_exists 1
1224 at at_end
1225 else
1226 literal 58
1227 literal 32
1228'''
1229 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001230 # Debug output is output again even a second time (bypassing
1231 # the cache -- issue #20426).
1232 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001233 re.compile(pat, re.DEBUG)
1234 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001235
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001236 def test_keyword_parameters(self):
1237 # Issue #20283: Accepting the string keyword parameter.
1238 pat = re.compile(r'(ab)')
1239 self.assertEqual(
1240 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1241 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001242 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1243 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001244 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1245 self.assertEqual(
1246 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1247 self.assertEqual(
1248 pat.split(string='abracadabra', maxsplit=1),
1249 ['', 'ab', 'racadabra'])
1250 self.assertEqual(
1251 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1252 (7, 9))
1253
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001254 def test_bug_20998(self):
1255 # Issue #20998: Fullmatch of repeated single character pattern
1256 # with ignore case.
1257 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1258
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001259
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001260class PatternReprTests(unittest.TestCase):
1261 def check(self, pattern, expected):
1262 self.assertEqual(repr(re.compile(pattern)), expected)
1263
1264 def check_flags(self, pattern, flags, expected):
1265 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1266
1267 def test_without_flags(self):
1268 self.check('random pattern',
1269 "re.compile('random pattern')")
1270
1271 def test_single_flag(self):
1272 self.check_flags('random pattern', re.IGNORECASE,
1273 "re.compile('random pattern', re.IGNORECASE)")
1274
1275 def test_multiple_flags(self):
1276 self.check_flags('random pattern', re.I|re.S|re.X,
1277 "re.compile('random pattern', "
1278 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1279
1280 def test_unicode_flag(self):
1281 self.check_flags('random pattern', re.U,
1282 "re.compile('random pattern')")
1283 self.check_flags('random pattern', re.I|re.S|re.U,
1284 "re.compile('random pattern', "
1285 "re.IGNORECASE|re.DOTALL)")
1286
1287 def test_inline_flags(self):
1288 self.check('(?i)pattern',
1289 "re.compile('(?i)pattern', re.IGNORECASE)")
1290
1291 def test_unknown_flags(self):
1292 self.check_flags('random pattern', 0x123000,
1293 "re.compile('random pattern', 0x123000)")
1294 self.check_flags('random pattern', 0x123000|re.I,
1295 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1296
1297 def test_bytes(self):
1298 self.check(b'bytes pattern',
1299 "re.compile(b'bytes pattern')")
1300 self.check_flags(b'bytes pattern', re.A,
1301 "re.compile(b'bytes pattern', re.ASCII)")
1302
1303 def test_quotes(self):
1304 self.check('random "double quoted" pattern',
1305 '''re.compile('random "double quoted" pattern')''')
1306 self.check("random 'single quoted' pattern",
1307 '''re.compile("random 'single quoted' pattern")''')
1308 self.check('''both 'single' and "double" quotes''',
1309 '''re.compile('both \\'single\\' and "double" quotes')''')
1310
1311 def test_long_pattern(self):
1312 pattern = 'Very %spattern' % ('long ' * 1000)
1313 r = repr(re.compile(pattern))
1314 self.assertLess(len(r), 300)
1315 self.assertEqual(r[:30], "re.compile('Very long long lon")
1316 r = repr(re.compile(pattern, re.I))
1317 self.assertLess(len(r), 300)
1318 self.assertEqual(r[:30], "re.compile('Very long long lon")
1319 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1320
1321
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001322class ImplementationTest(unittest.TestCase):
1323 """
1324 Test implementation details of the re module.
1325 """
1326
1327 def test_overlap_table(self):
1328 f = sre_compile._generate_overlap_table
1329 self.assertEqual(f(""), [])
1330 self.assertEqual(f("a"), [0])
1331 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1332 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1333 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1334 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1335
1336
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001337def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +00001338 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001339 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001340 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001341 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001342 # To save time, only run the first and last 10 tests
1343 #tests = tests[:10] + tests[-10:]
1344 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001345
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001346 for t in tests:
1347 sys.stdout.flush()
1348 pattern = s = outcome = repl = expected = None
1349 if len(t) == 5:
1350 pattern, s, outcome, repl, expected = t
1351 elif len(t) == 3:
1352 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001353 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001354 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001355
Guido van Rossum41360a41998-03-26 19:42:58 +00001356 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001357 obj = re.compile(pattern)
1358 except re.error:
1359 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001360 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001361 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001362 except KeyboardInterrupt: raise KeyboardInterrupt
1363 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001364 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001365 if verbose:
1366 traceback.print_exc(file=sys.stdout)
1367 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001368 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001369 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001370 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001371 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001372 if outcome == SYNTAX_ERROR:
1373 # This should have been a syntax error; forget it.
1374 pass
1375 elif outcome == FAIL:
1376 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001377 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001378 elif outcome == SUCCEED:
1379 if result is not None:
1380 # Matched, as expected, so now we compute the
1381 # result string and compare it to our expected result.
1382 start, end = result.span(0)
1383 vardict={'found': result.group(0),
1384 'groups': result.group(),
1385 'flags': result.re.flags}
1386 for i in range(1, 100):
1387 try:
1388 gi = result.group(i)
1389 # Special hack because else the string concat fails:
1390 if gi is None:
1391 gi = "None"
1392 except IndexError:
1393 gi = "Error"
1394 vardict['g%d' % i] = gi
1395 for i in result.re.groupindex.keys():
1396 try:
1397 gi = result.group(i)
1398 if gi is None:
1399 gi = "None"
1400 except IndexError:
1401 gi = "Error"
1402 vardict[i] = gi
1403 repl = eval(repl, vardict)
1404 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001405 print('=== grouping error', t, end=' ')
1406 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001407 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001408 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001409
Antoine Pitrou22628c42008-07-22 17:53:22 +00001410 # Try the match with both pattern and string converted to
1411 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001412 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001413 bpat = bytes(pattern, "ascii")
1414 bs = bytes(s, "ascii")
1415 except UnicodeEncodeError:
1416 # skip non-ascii tests
1417 pass
1418 else:
1419 try:
1420 bpat = re.compile(bpat)
1421 except Exception:
1422 print('=== Fails on bytes pattern compile', t)
1423 if verbose:
1424 traceback.print_exc(file=sys.stdout)
1425 else:
1426 bytes_result = bpat.search(bs)
1427 if bytes_result is None:
1428 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001429
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001430 # Try the match with the search area limited to the extent
1431 # of the match and see if it still succeeds. \B will
1432 # break (because it won't match at the end or start of a
1433 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001434
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001435 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1436 and result is not None:
1437 obj = re.compile(pattern)
1438 result = obj.search(s, result.start(0), result.end(0) + 1)
1439 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001440 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001441
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001442 # Try the match with IGNORECASE enabled, and check that it
1443 # still succeeds.
1444 obj = re.compile(pattern, re.IGNORECASE)
1445 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001446 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001447 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001448
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001449 # Try the match with LOCALE enabled, and check that it
1450 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001451 if '(?u)' not in pattern:
1452 obj = re.compile(pattern, re.LOCALE)
1453 result = obj.search(s)
1454 if result is None:
1455 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001456
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001457 # Try the match with UNICODE locale enabled, and check
1458 # that it still succeeds.
1459 obj = re.compile(pattern, re.UNICODE)
1460 result = obj.search(s)
1461 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001462 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001463
Gregory P. Smith5a631832010-07-27 05:31:29 +00001464
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001465def test_main():
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001466 run_unittest(__name__)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001467 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001468
1469if __name__ == "__main__":
1470 test_main()