blob: fb573057d3461abd958e2485b4aeb42b069ea69c [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
Antoine Pitroud2cc7432014-02-03 20:59:59 +01002 cpython_only, captured_stdout
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02007import sre_compile
R David Murray26dfaac92013-04-14 13:00:54 -04008import sre_constants
Ezio Melottid2114eb2011-03-25 14:08:44 +02009import sys
10import string
11import traceback
Antoine Pitrou79aa68d2013-10-25 21:36:10 +020012import unittest
Raymond Hettinger027bb632004-05-31 03:09:25 +000013from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000014
Guido van Rossum23b22571997-07-17 22:36:14 +000015# Misc tests from Tim Peters' re.doc
16
Just van Rossum6802c6e2003-07-02 14:36:59 +000017# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020018# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000019# cover most of the code.
20
Serhiy Storchaka25324972013-10-16 12:46:28 +030021class S(str):
22 def __getitem__(self, index):
23 return S(super().__getitem__(index))
24
25class B(bytes):
26 def __getitem__(self, index):
27 return B(super().__getitem__(index))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000030
Serhiy Storchaka25324972013-10-16 12:46:28 +030031 def assertTypedEqual(self, actual, expect, msg=None):
32 self.assertEqual(actual, expect, msg)
33 def recurse(actual, expect):
34 if isinstance(expect, (tuple, list)):
35 for x, y in zip(actual, expect):
36 recurse(x, y)
37 else:
38 self.assertIs(type(actual), type(expect), msg)
39 recurse(actual, expect)
40
Benjamin Petersone48944b2012-03-07 14:50:25 -060041 def test_keep_buffer(self):
42 # See bug 14212
43 b = bytearray(b'x')
44 it = re.finditer(b'a', b)
45 with self.assertRaises(BufferError):
46 b.extend(b'x'*400)
47 list(it)
48 del it
49 gc_collect()
50 b.extend(b'x'*400)
51
Raymond Hettinger027bb632004-05-31 03:09:25 +000052 def test_weakref(self):
53 s = 'QabbbcR'
54 x = re.compile('ab+c')
55 y = proxy(x)
56 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
57
Skip Montanaro8ed06da2003-04-24 19:43:18 +000058 def test_search_star_plus(self):
59 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
60 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
61 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
62 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030063 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
65 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
66 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
67 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030068 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000071 int_value = int(matchobj.group(0))
72 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000073
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030075 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
76 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
77 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
78 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
79 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
80 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030081 for y in ("\xe0", "\u0430", "\U0001d49c"):
82 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +030083
Skip Montanaro8ed06da2003-04-24 19:43:18 +000084 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
85 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
86 '9.3 -3 24x100y')
87 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
88 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000089
Skip Montanaro8ed06da2003-04-24 19:43:18 +000090 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
91 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000092
Skip Montanaro8ed06da2003-04-24 19:43:18 +000093 s = r"\1\1"
94 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
95 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
96 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000097
Skip Montanaro8ed06da2003-04-24 19:43:18 +000098 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
99 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
100 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
101 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000102
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000103 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
104 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
105 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
106 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
107 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +0000108
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000109 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000110
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000111 def test_bug_449964(self):
112 # fails for group followed by other escape
113 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
114 'xx\bxx\b')
115
116 def test_bug_449000(self):
117 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000118 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
119 'abc\ndef\n')
120 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
121 'abc\ndef\n')
122 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
123 'abc\ndef\n')
124 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
125 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000126
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000127 def test_bug_1661(self):
128 # Verify that flags do not get silently ignored with compiled patterns
129 pattern = re.compile('.')
130 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
131 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
132 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
133 self.assertRaises(ValueError, re.compile, pattern, re.I)
134
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000135 def test_bug_3629(self):
136 # A regex that triggered a bug in the sre-code validator
137 re.compile("(?P<quote>)(?(quote))")
138
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000139 def test_sub_template_numeric_escape(self):
140 # bug 776311 and friends
141 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
143 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
144 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
145 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
146 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
147 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
148
149 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
150 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
151
152 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
153 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
154 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
155 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
156 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
157
158 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
159 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000160
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000161 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
162 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
163 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
164 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
165 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
166 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
167 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
168 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
169 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
170 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
171 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
172 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
173
174 # in python2.3 (etc), these loop endlessly in sre_parser.py
175 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
176 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
177 'xz8')
178 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
179 'xza')
180
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000181 def test_qualified_re_sub(self):
182 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
183 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000184
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000185 def test_bug_114660(self):
186 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
187 'hello there')
188
189 def test_bug_462270(self):
190 # Test for empty sub() behaviour, see SF bug #462270
191 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
192 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
193
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200194 def test_symbolic_groups(self):
195 re.compile('(?P<a>x)(?P=a)(?(a)y)')
196 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
197 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
198 self.assertRaises(re.error, re.compile, '(?Px)')
199 self.assertRaises(re.error, re.compile, '(?P=)')
200 self.assertRaises(re.error, re.compile, '(?P=1)')
201 self.assertRaises(re.error, re.compile, '(?P=a)')
202 self.assertRaises(re.error, re.compile, '(?P=a1)')
203 self.assertRaises(re.error, re.compile, '(?P=a.)')
204 self.assertRaises(re.error, re.compile, '(?P<)')
205 self.assertRaises(re.error, re.compile, '(?P<>)')
206 self.assertRaises(re.error, re.compile, '(?P<1>)')
207 self.assertRaises(re.error, re.compile, '(?P<a.>)')
208 self.assertRaises(re.error, re.compile, '(?())')
209 self.assertRaises(re.error, re.compile, '(?(a))')
210 self.assertRaises(re.error, re.compile, '(?(1a))')
211 self.assertRaises(re.error, re.compile, '(?(a.))')
Georg Brandl1d472b72013-04-14 11:40:00 +0200212 # New valid/invalid identifiers in Python 3
213 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
214 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
215 self.assertRaises(re.error, re.compile, '(?P<©>x)')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200216
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000217 def test_symbolic_refs(self):
218 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
219 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
220 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
221 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200222 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000223 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
224 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
225 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
226 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000227 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Georg Brandl1d472b72013-04-14 11:40:00 +0200228 # New valid/invalid identifiers in Python 3
229 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
230 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
231 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000232
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000233 def test_re_subn(self):
234 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
235 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
236 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
237 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
238 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000239
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000240 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300241 for string in ":a:b::c", S(":a:b::c"):
242 self.assertTypedEqual(re.split(":", string),
243 ['', 'a', 'b', '', 'c'])
244 self.assertTypedEqual(re.split(":*", string),
245 ['', 'a', 'b', 'c'])
246 self.assertTypedEqual(re.split("(:*)", string),
247 ['', ':', 'a', ':', 'b', '::', 'c'])
248 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
249 memoryview(b":a:b::c")):
250 self.assertTypedEqual(re.split(b":", string),
251 [b'', b'a', b'b', b'', b'c'])
252 self.assertTypedEqual(re.split(b":*", string),
253 [b'', b'a', b'b', b'c'])
254 self.assertTypedEqual(re.split(b"(:*)", string),
255 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300256 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
257 "\U0001d49c\U0001d49e\U0001d4b5"):
258 string = ":%s:%s::%s" % (a, b, c)
259 self.assertEqual(re.split(":", string), ['', a, b, '', c])
260 self.assertEqual(re.split(":*", string), ['', a, b, c])
261 self.assertEqual(re.split("(:*)", string),
262 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300263
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000264 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
265 self.assertEqual(re.split("(:)*", ":a:b::c"),
266 ['', ':', 'a', ':', 'b', ':', 'c'])
267 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
268 ['', ':', 'a', ':b::', 'c'])
269 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
270 ['', None, ':', 'a', None, ':', '', 'b', None, '',
271 None, '::', 'c'])
272 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
273 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000274
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000275 def test_qualified_re_split(self):
276 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
277 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
278 self.assertEqual(re.split("(:)", ":a:b::c", 2),
279 ['', ':', 'a', ':', 'b::c'])
280 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
281 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000282
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000283 def test_re_findall(self):
284 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300285 for string in "a:b::c:::d", S("a:b::c:::d"):
286 self.assertTypedEqual(re.findall(":+", string),
287 [":", "::", ":::"])
288 self.assertTypedEqual(re.findall("(:+)", string),
289 [":", "::", ":::"])
290 self.assertTypedEqual(re.findall("(:)(:*)", string),
291 [(":", ""), (":", ":"), (":", "::")])
292 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
293 memoryview(b"a:b::c:::d")):
294 self.assertTypedEqual(re.findall(b":+", string),
295 [b":", b"::", b":::"])
296 self.assertTypedEqual(re.findall(b"(:+)", string),
297 [b":", b"::", b":::"])
298 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
299 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300300 for x in ("\xe0", "\u0430", "\U0001d49c"):
301 xx = x * 2
302 xxx = x * 3
303 string = "a%sb%sc%sd" % (x, xx, xxx)
304 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
305 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
306 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
307 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000308
Skip Montanaro5ba00542003-04-25 16:00:14 +0000309 def test_bug_117612(self):
310 self.assertEqual(re.findall(r"(a|(b))", "aba"),
311 [("a", ""),("b", "b"),("a", "")])
312
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000313 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300314 for string in 'a', S('a'):
315 self.assertEqual(re.match('a', string).groups(), ())
316 self.assertEqual(re.match('(a)', string).groups(), ('a',))
317 self.assertEqual(re.match('(a)', string).group(0), 'a')
318 self.assertEqual(re.match('(a)', string).group(1), 'a')
319 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
320 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
321 self.assertEqual(re.match(b'a', string).groups(), ())
322 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
323 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
324 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
325 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300326 for a in ("\xe0", "\u0430", "\U0001d49c"):
327 self.assertEqual(re.match(a, a).groups(), ())
328 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
329 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
330 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
331 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000332
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000333 pat = re.compile('((a)|(b))(c)?')
334 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
335 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
336 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
337 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
338 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000339
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000340 # A single group
341 m = re.match('(a)', 'a')
342 self.assertEqual(m.group(0), 'a')
343 self.assertEqual(m.group(0), 'a')
344 self.assertEqual(m.group(1), 'a')
345 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000346
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000347 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
348 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
349 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
350 (None, 'b', None))
351 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000352
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200353 def test_re_fullmatch(self):
354 # Issue 16203: Proposal: add re.fullmatch() method.
355 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
356 for string in "ab", S("ab"):
357 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
358 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
359 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
360 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
361 r = r"%s|%s" % (a, a + b)
362 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
363 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
364 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
365 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
366 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
367 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
368 self.assertIsNone(re.fullmatch(r"a+", "ab"))
369 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
370 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
371 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
372 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
373 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
374 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
375
376 self.assertEqual(
377 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
378 self.assertEqual(
379 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
380 self.assertEqual(
381 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
382
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000383 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000384 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
385 ('(', 'a'))
386 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
387 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300388 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
389 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000390 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
391 ('a', 'b'))
392 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
393 (None, 'd'))
394 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
395 (None, 'd'))
396 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
397 ('a', ''))
398
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000399 # Tests for bug #1177831: exercise groups other than the first group
400 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
401 self.assertEqual(p.match('abc').groups(),
402 ('a', 'b', 'c'))
403 self.assertEqual(p.match('ad').groups(),
404 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300405 self.assertIsNone(p.match('abd'))
406 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000407
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000408
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000409 def test_re_groupref(self):
410 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
411 ('|', 'a'))
412 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
413 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300414 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
415 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000416 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
417 ('a', 'a'))
418 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
419 (None, None))
420
421 def test_groupdict(self):
422 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
423 'first second').groupdict(),
424 {'first':'first', 'second':'second'})
425
426 def test_expand(self):
427 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
428 "first second")
429 .expand(r"\2 \1 \g<second> \g<first>"),
430 "second first second first")
431
432 def test_repeat_minmax(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300433 self.assertIsNone(re.match("^(\w){1}$", "abc"))
434 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
435 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
436 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000437
438 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
439 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
440 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
441 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
442 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
443 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
444 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
445 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
446
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300447 self.assertIsNone(re.match("^x{1}$", "xxx"))
448 self.assertIsNone(re.match("^x{1}?$", "xxx"))
449 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
450 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000451
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300452 self.assertTrue(re.match("^x{3}$", "xxx"))
453 self.assertTrue(re.match("^x{1,3}$", "xxx"))
454 self.assertTrue(re.match("^x{1,4}$", "xxx"))
455 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
456 self.assertTrue(re.match("^x{3}?$", "xxx"))
457 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
458 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
459 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000460
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300461 self.assertIsNone(re.match("^x{}$", "xxx"))
462 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000463
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000464 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000465 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000466 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000467 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
468 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
469 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
470 {'first': 1, 'other': 2})
471
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000472 self.assertEqual(re.match("(a)", "a").pos, 0)
473 self.assertEqual(re.match("(a)", "a").endpos, 1)
474 self.assertEqual(re.match("(a)", "a").string, "a")
475 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300476 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000477
478 def test_special_escapes(self):
479 self.assertEqual(re.search(r"\b(b.)\b",
480 "abcd abc bcd bx").group(1), "bx")
481 self.assertEqual(re.search(r"\B(b.)\B",
482 "abc bcd bc abxd").group(1), "bx")
483 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300484 "abcd abc bcd bx", re.ASCII).group(1), "bx")
485 self.assertEqual(re.search(r"\B(b.)\B",
486 "abc bcd bc abxd", re.ASCII).group(1), "bx")
487 self.assertEqual(re.search(r"\b(b.)\b",
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000488 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
489 self.assertEqual(re.search(r"\B(b.)\B",
490 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000491 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
492 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300493 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300494 self.assertEqual(re.search(br"\b(b.)\b",
495 b"abcd abc bcd bx").group(1), b"bx")
496 self.assertEqual(re.search(br"\B(b.)\B",
497 b"abc bcd bc abxd").group(1), b"bx")
498 self.assertEqual(re.search(br"\b(b.)\b",
499 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
500 self.assertEqual(re.search(br"\B(b.)\B",
501 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
502 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
503 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300504 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000505 self.assertEqual(re.search(r"\d\D\w\W\s\S",
506 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300507 self.assertEqual(re.search(br"\d\D\w\W\s\S",
508 b"1aa! a").group(0), b"1aa! a")
509 self.assertEqual(re.search(r"\d\D\w\W\s\S",
510 "1aa! a", re.ASCII).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000511 self.assertEqual(re.search(r"\d\D\w\W\s\S",
512 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300513 self.assertEqual(re.search(br"\d\D\w\W\s\S",
514 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000515
Ezio Melotti5a045b92012-02-29 11:48:44 +0200516 def test_string_boundaries(self):
517 # See http://bugs.python.org/issue10713
518 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
519 "abc")
520 # There's a word boundary at the start of a string.
521 self.assertTrue(re.match(r"\b", "abc"))
522 # A non-empty string includes a non-boundary zero-length match.
523 self.assertTrue(re.search(r"\B", "abc"))
524 # There is no non-boundary match at the start of a string.
525 self.assertFalse(re.match(r"\B", "abc"))
526 # However, an empty string contains no word boundaries, and also no
527 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300528 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200529 # This one is questionable and different from the perlre behaviour,
530 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300531 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200532 # A single word-character string has two boundaries, but no
533 # non-boundary gaps.
534 self.assertEqual(len(re.findall(r"\b", "a")), 2)
535 self.assertEqual(len(re.findall(r"\B", "a")), 0)
536 # If there are no words, there are no boundaries
537 self.assertEqual(len(re.findall(r"\b", " ")), 0)
538 self.assertEqual(len(re.findall(r"\b", " ")), 0)
539 # Can match around the whitespace.
540 self.assertEqual(len(re.findall(r"\B", " ")), 2)
541
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000542 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000543 self.assertEqual(re.match("([\u2222\u2223])",
544 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300545 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300546 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000547
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100548 def test_big_codesize(self):
549 # Issue #1160
550 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300551 self.assertTrue(r.match('1000'))
552 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100553
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000554 def test_anyall(self):
555 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
556 "a\nb")
557 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
558 "a\n\nb")
559
560 def test_non_consuming(self):
561 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
562 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
563 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
564 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
565 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
566 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
567 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
568
569 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
570 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
571 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
572 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
573
574 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000575 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300576 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000577 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
578 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
579 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
580 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
581 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
582 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
583 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
584 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
585
586 def test_category(self):
587 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
588
589 def test_getlower(self):
590 import _sre
591 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
592 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
593 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
594
595 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300596 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000597
598 def test_not_literal(self):
599 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
600 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
601
602 def test_search_coverage(self):
603 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
604 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
605
Ezio Melottid2114eb2011-03-25 14:08:44 +0200606 def assertMatch(self, pattern, text, match=None, span=None,
607 matcher=re.match):
608 if match is None and span is None:
609 # the pattern matches the whole text
610 match = text
611 span = (0, len(text))
612 elif match is None or span is None:
613 raise ValueError('If match is not None, span should be specified '
614 '(and vice versa).')
615 m = matcher(pattern, text)
616 self.assertTrue(m)
617 self.assertEqual(m.group(), match)
618 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000619
Ezio Melottid2114eb2011-03-25 14:08:44 +0200620 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300621 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200622 p = ''.join(chr(i) for i in range(256))
623 for c in p:
624 if c in alnum_chars:
625 self.assertEqual(re.escape(c), c)
626 elif c == '\x00':
627 self.assertEqual(re.escape(c), '\\000')
628 else:
629 self.assertEqual(re.escape(c), '\\' + c)
630 self.assertMatch(re.escape(c), c)
631 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000632
Guido van Rossum698280d2008-09-10 17:44:35 +0000633 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300634 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200635 p = bytes(range(256))
636 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000637 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200638 if b in alnum_chars:
639 self.assertEqual(re.escape(b), b)
640 elif i == 0:
641 self.assertEqual(re.escape(b), b'\\000')
642 else:
643 self.assertEqual(re.escape(b), b'\\' + b)
644 self.assertMatch(re.escape(b), b)
645 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000646
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200647 def test_re_escape_non_ascii(self):
648 s = 'xxx\u2620\u2620\u2620xxx'
649 s_escaped = re.escape(s)
650 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
651 self.assertMatch(s_escaped, s)
652 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
653 'x\u2620\u2620\u2620x', (2, 7), re.search)
654
655 def test_re_escape_non_ascii_bytes(self):
656 b = 'y\u2620y\u2620y'.encode('utf-8')
657 b_escaped = re.escape(b)
658 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
659 self.assertMatch(b_escaped, b)
660 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
661 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000662
Serhiy Storchakab85a9762014-09-15 11:33:19 +0300663 def test_pickling(self):
664 import pickle
665 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
666 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
667 pickled = pickle.dumps(oldpat, proto)
668 newpat = pickle.loads(pickled)
669 self.assertEqual(newpat, oldpat)
670 # current pickle expects the _compile() reconstructor in re module
671 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +0000672
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000673 def test_constants(self):
674 self.assertEqual(re.I, re.IGNORECASE)
675 self.assertEqual(re.L, re.LOCALE)
676 self.assertEqual(re.M, re.MULTILINE)
677 self.assertEqual(re.S, re.DOTALL)
678 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000679
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000680 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000681 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300682 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000683
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000684 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200685 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
686 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300687 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
688 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
689 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
690 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
691 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
692 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200693 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300694 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
695 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
696 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
697 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
698 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
699 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
700 self.assertTrue(re.match(r"\0", "\000"))
701 self.assertTrue(re.match(r"\08", "\0008"))
702 self.assertTrue(re.match(r"\01", "\001"))
703 self.assertTrue(re.match(r"\018", "\0018"))
704 self.assertTrue(re.match(r"\567", chr(0o167)))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200705 self.assertRaises(re.error, re.match, r"\911", "")
706 self.assertRaises(re.error, re.match, r"\x1", "")
707 self.assertRaises(re.error, re.match, r"\x1z", "")
708 self.assertRaises(re.error, re.match, r"\u123", "")
709 self.assertRaises(re.error, re.match, r"\u123z", "")
710 self.assertRaises(re.error, re.match, r"\U0001234", "")
711 self.assertRaises(re.error, re.match, r"\U0001234z", "")
712 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000713
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000714 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200715 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
716 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300717 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
718 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
719 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
720 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
721 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
722 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
723 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
724 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200725 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300726 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
727 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
728 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
729 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
730 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
731 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
732 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200733 self.assertRaises(re.error, re.match, r"[\911]", "")
734 self.assertRaises(re.error, re.match, r"[\x1z]", "")
735 self.assertRaises(re.error, re.match, r"[\u123z]", "")
736 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
737 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
738
739 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000740 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300741 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
742 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
743 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
744 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
745 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
746 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
747 self.assertTrue(re.match(br"\u", b'u'))
748 self.assertTrue(re.match(br"\U", b'U'))
749 self.assertTrue(re.match(br"\0", b"\000"))
750 self.assertTrue(re.match(br"\08", b"\0008"))
751 self.assertTrue(re.match(br"\01", b"\001"))
752 self.assertTrue(re.match(br"\018", b"\0018"))
753 self.assertTrue(re.match(br"\567", bytes([0o167])))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200754 self.assertRaises(re.error, re.match, br"\911", b"")
755 self.assertRaises(re.error, re.match, br"\x1", b"")
756 self.assertRaises(re.error, re.match, br"\x1z", b"")
757
758 def test_sre_byte_class_literals(self):
759 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300760 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
761 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
762 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
763 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
764 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
765 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
766 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
767 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
768 self.assertTrue(re.match(br"[\u]", b'u'))
769 self.assertTrue(re.match(br"[\U]", b'U'))
Serhiy Storchakacd9032d2014-09-23 23:04:21 +0300770 self.assertRaises(re.error, re.match, br"[\911]", b"")
771 self.assertRaises(re.error, re.match, br"[\x1z]", b"")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000772
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000773 def test_bug_113254(self):
774 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
775 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
776 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
777
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000778 def test_bug_527371(self):
779 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300780 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000781 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
782 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
783 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
784 self.assertEqual(re.match("((a))", "a").lastindex, 1)
785
786 def test_bug_545855(self):
787 # bug 545855 -- This pattern failed to cause a compile error as it
788 # should, instead provoking a TypeError.
789 self.assertRaises(re.error, re.compile, 'foo[a-')
790
791 def test_bug_418626(self):
792 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
793 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
794 # pattern '*?' on a long string.
795 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
796 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
797 20003)
798 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000799 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000800 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000801 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000802
803 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000804 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000805 self.assertEqual(re.compile(pat) and 1, 1)
806
Skip Montanaro1e703c62003-04-25 15:40:28 +0000807 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000808 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000809 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000810 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
811 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
812 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000813
Serhiy Storchakafa468162013-02-16 21:23:53 +0200814 def test_unlimited_zero_width_repeat(self):
815 # Issue #9669
816 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
817 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
818 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
819 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
820 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
821 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
822
Skip Montanaro1e703c62003-04-25 15:40:28 +0000823 def test_scanner(self):
824 def s_ident(scanner, token): return token
825 def s_operator(scanner, token): return "op%s" % token
826 def s_float(scanner, token): return float(token)
827 def s_int(scanner, token): return int(token)
828
829 scanner = Scanner([
830 (r"[a-zA-Z_]\w*", s_ident),
831 (r"\d+\.\d*", s_float),
832 (r"\d+", s_int),
833 (r"=|\+|-|\*|/", s_operator),
834 (r"\s+", None),
835 ])
836
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300837 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000838
Skip Montanaro1e703c62003-04-25 15:40:28 +0000839 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
840 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
841 'op+', 'bar'], ''))
842
Skip Montanaro5ba00542003-04-25 16:00:14 +0000843 def test_bug_448951(self):
844 # bug 448951 (similar to 429357, but with single char match)
845 # (Also test greedy matches.)
846 for op in '','?','*':
847 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
848 (None, None))
849 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
850 ('a:', 'a'))
851
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000852 def test_bug_725106(self):
853 # capturing groups in alternatives in repeats
854 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
855 ('b', 'a'))
856 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
857 ('c', 'b'))
858 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
859 ('b', None))
860 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
861 ('b', None))
862 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
863 ('b', 'a'))
864 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
865 ('c', 'b'))
866 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
867 ('b', None))
868 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
869 ('b', None))
870
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000871 def test_bug_725149(self):
872 # mark_stack_base restoring before restoring marks
873 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
874 ('a', None))
875 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
876 ('a', None, None))
877
Just van Rossum12723ba2003-07-02 20:03:04 +0000878 def test_bug_764548(self):
879 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000880 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000881 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300882 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000883
Skip Montanaro5ba00542003-04-25 16:00:14 +0000884 def test_finditer(self):
885 iter = re.finditer(r":+", "a:b::c:::d")
886 self.assertEqual([item.group(0) for item in iter],
887 [":", "::", ":::"])
888
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600889 pat = re.compile(r":+")
890 iter = pat.finditer("a:b::c:::d", 1, 10)
891 self.assertEqual([item.group(0) for item in iter],
892 [":", "::", ":::"])
893
894 pat = re.compile(r":+")
895 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
896 self.assertEqual([item.group(0) for item in iter],
897 [":", "::", ":::"])
898
899 pat = re.compile(r":+")
900 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
901 self.assertEqual([item.group(0) for item in iter],
902 [":", "::", ":::"])
903
904 pat = re.compile(r":+")
905 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
906 self.assertEqual([item.group(0) for item in iter],
907 ["::", "::"])
908
Thomas Wouters40a088d2008-03-18 20:19:54 +0000909 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300910 self.assertIsNot(re.compile('bug_926075'),
911 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000912
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000913 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300914 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000915 self.assertEqual(re.compile(pattern).split("a.b.c"),
916 ['a','b','c'])
917
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000918 def test_bug_581080(self):
919 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000920 self.assertEqual(next(iter).span(), (1,2))
921 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000922
923 scanner = re.compile(r"\s").scanner("a b")
924 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300925 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000926
927 def test_bug_817234(self):
928 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000929 self.assertEqual(next(iter).span(), (0, 4))
930 self.assertEqual(next(iter).span(), (4, 4))
931 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000932
Mark Dickinson1f268282009-07-28 17:22:36 +0000933 def test_bug_6561(self):
934 # '\d' should match characters in Unicode category 'Nd'
935 # (Number, Decimal Digit), but not those in 'Nl' (Number,
936 # Letter) or 'No' (Number, Other).
937 decimal_digits = [
938 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
939 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
940 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
941 ]
942 for x in decimal_digits:
943 self.assertEqual(re.match('^\d$', x).group(0), x)
944
945 not_decimal_digits = [
946 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
947 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
948 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
949 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
950 ]
951 for x in not_decimal_digits:
952 self.assertIsNone(re.match('^\d$', x))
953
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 def test_empty_array(self):
955 # SF buf 1647541
956 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000957 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300959 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +0000960 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000961
Christian Heimes072c0f12008-01-03 23:01:04 +0000962 def test_inline_flags(self):
963 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000964 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
965 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000966
967 p = re.compile(upper_char, re.I | re.U)
968 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300969 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000970
971 p = re.compile(lower_char, re.I | re.U)
972 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300973 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000974
975 p = re.compile('(?i)' + upper_char, re.U)
976 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300977 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000978
979 p = re.compile('(?i)' + lower_char, re.U)
980 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300981 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000982
983 p = re.compile('(?iu)' + upper_char)
984 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300985 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000986
987 p = re.compile('(?iu)' + lower_char)
988 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300989 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +0000990
Christian Heimes25bb7832008-01-11 16:17:00 +0000991 def test_dollar_matches_twice(self):
992 "$ matches the end of string, and just before the terminating \n"
993 pattern = re.compile('$')
994 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
995 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
996 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
997
998 pattern = re.compile('$', re.MULTILINE)
999 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1000 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1001 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1002
Antoine Pitroufd036452008-08-19 17:56:33 +00001003 def test_bytes_str_mixing(self):
1004 # Mixing str and bytes is disallowed
1005 pat = re.compile('.')
1006 bpat = re.compile(b'.')
1007 self.assertRaises(TypeError, pat.match, b'b')
1008 self.assertRaises(TypeError, bpat.match, 'b')
1009 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1010 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1011 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1012 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1013 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1014 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1015
1016 def test_ascii_and_unicode_flag(self):
1017 # String patterns
1018 for flags in (0, re.UNICODE):
1019 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001020 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001021 pat = re.compile('\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001022 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001023 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001024 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001025 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001026 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001027 pat = re.compile('\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001028 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001029 pat = re.compile('(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001030 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001031 # Bytes patterns
1032 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001033 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001034 self.assertIsNone(pat.match(b'\xe0'))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001035 pat = re.compile(b'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001036 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001037 # Incompatibilities
1038 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
1039 self.assertRaises(ValueError, re.compile, b'(?u)\w')
1040 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
1041 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
1042 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
1043 self.assertRaises(ValueError, re.compile, '(?au)\w')
1044
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001045 def test_bug_6509(self):
1046 # Replacement strings of both types must parse properly.
1047 # all strings
1048 pat = re.compile('a(\w)')
1049 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1050 pat = re.compile('a(.)')
1051 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1052 pat = re.compile('..')
1053 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1054
1055 # all bytes
1056 pat = re.compile(b'a(\w)')
1057 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1058 pat = re.compile(b'a(.)')
1059 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1060 pat = re.compile(b'..')
1061 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1062
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001063 def test_dealloc(self):
1064 # issue 3299: check for segfault in debug build
1065 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001066 # the overflow limit is different on wide and narrow builds and it
1067 # depends on the definition of SRE_CODE (see sre.h).
1068 # 2**128 should be big enough to overflow on both. For smaller values
1069 # a RuntimeError is raised instead of OverflowError.
1070 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001071 self.assertRaises(TypeError, re.finditer, "a", {})
1072 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +00001073 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001074
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001075 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001076 self.assertTrue(re.search("123.*-", '123abc-'))
1077 self.assertTrue(re.search("123.*-", '123\xe9-'))
1078 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1079 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1080 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001081
Ezio Melottidf723e12012-03-13 01:29:48 +02001082 def test_compile(self):
1083 # Test return value when given string and pattern as parameter
1084 pattern = re.compile('random pattern')
1085 self.assertIsInstance(pattern, re._pattern_type)
1086 same_pattern = re.compile(pattern)
1087 self.assertIsInstance(same_pattern, re._pattern_type)
1088 self.assertIs(same_pattern, pattern)
1089 # Test behaviour when not given a string or pattern as parameter
1090 self.assertRaises(TypeError, re.compile, 0)
1091
Ezio Melottife8e6e72013-01-11 08:32:01 +02001092 def test_bug_13899(self):
1093 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
1094 # nothing. Ditto B and Z.
1095 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1096 ['A', 'B', '\b', 'C', 'Z'])
1097
Antoine Pitroub33941a2012-12-03 20:55:56 +01001098 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001099 def test_large_search(self, size):
1100 # Issue #10182: indices were 32-bit-truncated.
1101 s = 'a' * size
1102 m = re.search('$', s)
1103 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001104 self.assertEqual(m.start(), size)
1105 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001106
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001107 # The huge memuse is because of re.sub() using a list and a join()
1108 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001109 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001110 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001111 # Issue #10182: indices were 32-bit-truncated.
1112 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001113 r, n = re.subn('', '', s)
1114 self.assertEqual(r, s)
1115 self.assertEqual(n, size + 1)
1116
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001117 def test_bug_16688(self):
1118 # Issue 16688: Backreferences make case-insensitive regex fail on
1119 # non-ASCII strings.
1120 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1121 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001122
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001123 def test_repeat_minmax_overflow(self):
1124 # Issue #13169
1125 string = "x" * 100000
1126 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1127 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1128 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1129 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1130 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1131 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1132 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1133 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1134 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1135 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1136 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1137
1138 @cpython_only
1139 def test_repeat_minmax_overflow_maxrepeat(self):
1140 try:
1141 from _sre import MAXREPEAT
1142 except ImportError:
1143 self.skipTest('requires _sre.MAXREPEAT constant')
1144 string = "x" * 100000
1145 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1146 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1147 (0, 100000))
1148 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1149 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1150 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1151 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1152
R David Murray26dfaac92013-04-14 13:00:54 -04001153 def test_backref_group_name_in_exception(self):
1154 # Issue 17341: Poor error message when compiling invalid regex
1155 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1156 re.compile('(?P=<foo>)')
1157
1158 def test_group_name_in_exception(self):
1159 # Issue 17341: Poor error message when compiling invalid regex
1160 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1161 re.compile('(?P<?foo>)')
1162
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001163 def test_issue17998(self):
1164 for reps in '*', '+', '?', '{1}':
1165 for mod in '', '?':
1166 pattern = '.' + reps + mod + 'yz'
1167 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1168 ['xyz'], msg=pattern)
1169 pattern = pattern.encode()
1170 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1171 [b'xyz'], msg=pattern)
1172
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001173 def test_match_repr(self):
1174 for string in '[abracadabra]', S('[abracadabra]'):
1175 m = re.search(r'(.+)(.*?)\1', string)
1176 self.assertEqual(repr(m), "<%s.%s object; "
1177 "span=(1, 12), match='abracadabra'>" %
1178 (type(m).__module__, type(m).__qualname__))
1179 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1180 bytearray(b'[abracadabra]'),
1181 memoryview(b'[abracadabra]')):
1182 m = re.search(rb'(.+)(.*?)\1', string)
1183 self.assertEqual(repr(m), "<%s.%s object; "
1184 "span=(1, 12), match=b'abracadabra'>" %
1185 (type(m).__module__, type(m).__qualname__))
1186
1187 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1188 self.assertEqual(repr(first), "<%s.%s object; "
1189 "span=(0, 2), match='aa'>" %
1190 (type(second).__module__, type(first).__qualname__))
1191 self.assertEqual(repr(second), "<%s.%s object; "
1192 "span=(3, 5), match='bb'>" %
1193 (type(second).__module__, type(second).__qualname__))
1194
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001195
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001196 def test_bug_2537(self):
1197 # issue 2537: empty submatches
1198 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1199 for inner_op in ('{0,}', '*', '?'):
1200 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1201 m = r.match("xyyzy")
1202 self.assertEqual(m.group(0), "xyy")
1203 self.assertEqual(m.group(1), "")
1204 self.assertEqual(m.group(2), "y")
1205
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001206 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001207 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001208 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001209 re.compile(pat, re.DEBUG)
1210 dump = '''\
1211subpattern 1
1212 literal 46
1213subpattern None
1214 branch
1215 in
1216 literal 99
1217 literal 104
1218 or
1219 literal 112
1220 literal 121
1221subpattern None
1222 groupref_exists 1
1223 at at_end
1224 else
1225 literal 58
1226 literal 32
1227'''
1228 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001229 # Debug output is output again even a second time (bypassing
1230 # the cache -- issue #20426).
1231 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001232 re.compile(pat, re.DEBUG)
1233 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001234
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001235 def test_keyword_parameters(self):
1236 # Issue #20283: Accepting the string keyword parameter.
1237 pat = re.compile(r'(ab)')
1238 self.assertEqual(
1239 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1240 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001241 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1242 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001243 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1244 self.assertEqual(
1245 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1246 self.assertEqual(
1247 pat.split(string='abracadabra', maxsplit=1),
1248 ['', 'ab', 'racadabra'])
1249 self.assertEqual(
1250 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1251 (7, 9))
1252
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001253 def test_bug_20998(self):
1254 # Issue #20998: Fullmatch of repeated single character pattern
1255 # with ignore case.
1256 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1257
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02001258 def test_locale_caching(self):
1259 # Issue #22410
1260 oldlocale = locale.setlocale(locale.LC_CTYPE)
1261 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1262 for loc in 'en_US.iso88591', 'en_US.utf8':
1263 try:
1264 locale.setlocale(locale.LC_CTYPE, loc)
1265 except locale.Error:
1266 # Unsupported locale on this system
1267 self.skipTest('test needs %s locale' % loc)
1268
1269 re.purge()
1270 self.check_en_US_iso88591()
1271 self.check_en_US_utf8()
1272 re.purge()
1273 self.check_en_US_utf8()
1274 self.check_en_US_iso88591()
1275
1276 def check_en_US_iso88591(self):
1277 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1278 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1279 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1280 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1281 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1282 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1283 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1284
1285 def check_en_US_utf8(self):
1286 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1287 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1288 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1289 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1290 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1291 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1292 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1293
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001294
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001295class PatternReprTests(unittest.TestCase):
1296 def check(self, pattern, expected):
1297 self.assertEqual(repr(re.compile(pattern)), expected)
1298
1299 def check_flags(self, pattern, flags, expected):
1300 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1301
1302 def test_without_flags(self):
1303 self.check('random pattern',
1304 "re.compile('random pattern')")
1305
1306 def test_single_flag(self):
1307 self.check_flags('random pattern', re.IGNORECASE,
1308 "re.compile('random pattern', re.IGNORECASE)")
1309
1310 def test_multiple_flags(self):
1311 self.check_flags('random pattern', re.I|re.S|re.X,
1312 "re.compile('random pattern', "
1313 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1314
1315 def test_unicode_flag(self):
1316 self.check_flags('random pattern', re.U,
1317 "re.compile('random pattern')")
1318 self.check_flags('random pattern', re.I|re.S|re.U,
1319 "re.compile('random pattern', "
1320 "re.IGNORECASE|re.DOTALL)")
1321
1322 def test_inline_flags(self):
1323 self.check('(?i)pattern',
1324 "re.compile('(?i)pattern', re.IGNORECASE)")
1325
1326 def test_unknown_flags(self):
1327 self.check_flags('random pattern', 0x123000,
1328 "re.compile('random pattern', 0x123000)")
1329 self.check_flags('random pattern', 0x123000|re.I,
1330 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1331
1332 def test_bytes(self):
1333 self.check(b'bytes pattern',
1334 "re.compile(b'bytes pattern')")
1335 self.check_flags(b'bytes pattern', re.A,
1336 "re.compile(b'bytes pattern', re.ASCII)")
1337
1338 def test_quotes(self):
1339 self.check('random "double quoted" pattern',
1340 '''re.compile('random "double quoted" pattern')''')
1341 self.check("random 'single quoted' pattern",
1342 '''re.compile("random 'single quoted' pattern")''')
1343 self.check('''both 'single' and "double" quotes''',
1344 '''re.compile('both \\'single\\' and "double" quotes')''')
1345
1346 def test_long_pattern(self):
1347 pattern = 'Very %spattern' % ('long ' * 1000)
1348 r = repr(re.compile(pattern))
1349 self.assertLess(len(r), 300)
1350 self.assertEqual(r[:30], "re.compile('Very long long lon")
1351 r = repr(re.compile(pattern, re.I))
1352 self.assertLess(len(r), 300)
1353 self.assertEqual(r[:30], "re.compile('Very long long lon")
1354 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1355
1356
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001357class ImplementationTest(unittest.TestCase):
1358 """
1359 Test implementation details of the re module.
1360 """
1361
1362 def test_overlap_table(self):
1363 f = sre_compile._generate_overlap_table
1364 self.assertEqual(f(""), [])
1365 self.assertEqual(f("a"), [0])
1366 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1367 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1368 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1369 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1370
1371
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001372def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +00001373 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001374 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001375 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001376 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001377 # To save time, only run the first and last 10 tests
1378 #tests = tests[:10] + tests[-10:]
1379 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001380
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001381 for t in tests:
1382 sys.stdout.flush()
1383 pattern = s = outcome = repl = expected = None
1384 if len(t) == 5:
1385 pattern, s, outcome, repl, expected = t
1386 elif len(t) == 3:
1387 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001388 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001389 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001390
Guido van Rossum41360a41998-03-26 19:42:58 +00001391 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001392 obj = re.compile(pattern)
1393 except re.error:
1394 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001395 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001396 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001397 except KeyboardInterrupt: raise KeyboardInterrupt
1398 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001399 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001400 if verbose:
1401 traceback.print_exc(file=sys.stdout)
1402 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001403 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001404 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001405 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001406 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001407 if outcome == SYNTAX_ERROR:
1408 # This should have been a syntax error; forget it.
1409 pass
1410 elif outcome == FAIL:
1411 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001412 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001413 elif outcome == SUCCEED:
1414 if result is not None:
1415 # Matched, as expected, so now we compute the
1416 # result string and compare it to our expected result.
1417 start, end = result.span(0)
1418 vardict={'found': result.group(0),
1419 'groups': result.group(),
1420 'flags': result.re.flags}
1421 for i in range(1, 100):
1422 try:
1423 gi = result.group(i)
1424 # Special hack because else the string concat fails:
1425 if gi is None:
1426 gi = "None"
1427 except IndexError:
1428 gi = "Error"
1429 vardict['g%d' % i] = gi
1430 for i in result.re.groupindex.keys():
1431 try:
1432 gi = result.group(i)
1433 if gi is None:
1434 gi = "None"
1435 except IndexError:
1436 gi = "Error"
1437 vardict[i] = gi
1438 repl = eval(repl, vardict)
1439 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001440 print('=== grouping error', t, end=' ')
1441 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001442 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001443 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001444
Antoine Pitrou22628c42008-07-22 17:53:22 +00001445 # Try the match with both pattern and string converted to
1446 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001447 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001448 bpat = bytes(pattern, "ascii")
1449 bs = bytes(s, "ascii")
1450 except UnicodeEncodeError:
1451 # skip non-ascii tests
1452 pass
1453 else:
1454 try:
1455 bpat = re.compile(bpat)
1456 except Exception:
1457 print('=== Fails on bytes pattern compile', t)
1458 if verbose:
1459 traceback.print_exc(file=sys.stdout)
1460 else:
1461 bytes_result = bpat.search(bs)
1462 if bytes_result is None:
1463 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001464
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001465 # Try the match with the search area limited to the extent
1466 # of the match and see if it still succeeds. \B will
1467 # break (because it won't match at the end or start of a
1468 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001469
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001470 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1471 and result is not None:
1472 obj = re.compile(pattern)
1473 result = obj.search(s, result.start(0), result.end(0) + 1)
1474 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001475 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001476
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001477 # Try the match with IGNORECASE enabled, and check that it
1478 # still succeeds.
1479 obj = re.compile(pattern, re.IGNORECASE)
1480 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001481 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001482 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001483
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001484 # Try the match with LOCALE enabled, and check that it
1485 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001486 if '(?u)' not in pattern:
1487 obj = re.compile(pattern, re.LOCALE)
1488 result = obj.search(s)
1489 if result is None:
1490 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001491
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001492 # Try the match with UNICODE locale enabled, and check
1493 # that it still succeeds.
1494 obj = re.compile(pattern, re.UNICODE)
1495 result = obj.search(s)
1496 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001497 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001498
Gregory P. Smith5a631832010-07-27 05:31:29 +00001499
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001500def test_main():
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001501 run_unittest(__name__)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001502 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001503
1504if __name__ == "__main__":
1505 test_main()