blob: a6cbbd0b67abf8868259d4d78d383a14c1717fc8 [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
Antoine Pitroud2cc7432014-02-03 20:59:59 +01002 cpython_only, captured_stdout
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02006import sre_compile
Ezio Melottid2114eb2011-03-25 14:08:44 +02007import string
Victor Stinnerb44fb122016-11-21 16:35:08 +01008import sys
Ezio Melottid2114eb2011-03-25 14:08:44 +02009import traceback
Antoine Pitrou79aa68d2013-10-25 21:36:10 +020010import unittest
Victor Stinnerb44fb122016-11-21 16:35:08 +010011import warnings
12from re import Scanner
Raymond Hettinger027bb632004-05-31 03:09:25 +000013from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000014
Guido van Rossum23b22571997-07-17 22:36:14 +000015# Misc tests from Tim Peters' re.doc
16
Just van Rossum6802c6e2003-07-02 14:36:59 +000017# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020018# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000019# cover most of the code.
20
Serhiy Storchaka25324972013-10-16 12:46:28 +030021class S(str):
22 def __getitem__(self, index):
23 return S(super().__getitem__(index))
24
25class B(bytes):
26 def __getitem__(self, index):
27 return B(super().__getitem__(index))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000030
Serhiy Storchaka25324972013-10-16 12:46:28 +030031 def assertTypedEqual(self, actual, expect, msg=None):
32 self.assertEqual(actual, expect, msg)
33 def recurse(actual, expect):
34 if isinstance(expect, (tuple, list)):
35 for x, y in zip(actual, expect):
36 recurse(x, y)
37 else:
38 self.assertIs(type(actual), type(expect), msg)
39 recurse(actual, expect)
40
Serhiy Storchaka632a77e2015-03-25 21:03:47 +020041 def checkPatternError(self, pattern, errmsg, pos=None):
42 with self.assertRaises(re.error) as cm:
43 re.compile(pattern)
44 with self.subTest(pattern=pattern):
45 err = cm.exception
46 self.assertEqual(err.msg, errmsg)
47 if pos is not None:
48 self.assertEqual(err.pos, pos)
49
50 def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
51 with self.assertRaises(re.error) as cm:
52 re.sub(pattern, repl, string)
53 with self.subTest(pattern=pattern, repl=repl):
54 err = cm.exception
55 self.assertEqual(err.msg, errmsg)
56 if pos is not None:
57 self.assertEqual(err.pos, pos)
58
Benjamin Petersone48944b2012-03-07 14:50:25 -060059 def test_keep_buffer(self):
60 # See bug 14212
61 b = bytearray(b'x')
62 it = re.finditer(b'a', b)
63 with self.assertRaises(BufferError):
64 b.extend(b'x'*400)
65 list(it)
66 del it
67 gc_collect()
68 b.extend(b'x'*400)
69
Raymond Hettinger027bb632004-05-31 03:09:25 +000070 def test_weakref(self):
71 s = 'QabbbcR'
72 x = re.compile('ab+c')
73 y = proxy(x)
74 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
75
Skip Montanaro8ed06da2003-04-24 19:43:18 +000076 def test_search_star_plus(self):
77 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
78 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
79 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
80 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030081 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000082 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
83 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
84 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
85 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030086 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000087
Skip Montanaro8ed06da2003-04-24 19:43:18 +000088 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000089 int_value = int(matchobj.group(0))
90 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000091
Skip Montanaro8ed06da2003-04-24 19:43:18 +000092 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030093 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
94 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
95 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
96 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
97 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
98 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030099 for y in ("\xe0", "\u0430", "\U0001d49c"):
100 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +0300101
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000102 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
103 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
104 '9.3 -3 24x100y')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300105 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
106 '9.3 -3 23x99y')
Victor Stinner55e614a2014-10-29 16:58:59 +0100107 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000109
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000110 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
111 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +0000112
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000113 s = r"\1\1"
114 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
115 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
116 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +0000117
R David Murray44b548d2016-09-08 13:59:53 -0400118 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
119 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
120 self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx')
121 self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000122
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200123 self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
124 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
125 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
126 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
127 for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
128 with self.subTest(c):
Serhiy Storchaka53c53ea2016-12-06 19:15:29 +0200129 with self.assertWarns(DeprecationWarning):
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200130 self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
Guido van Rossum95e80531997-08-13 22:34:14 +0000131
R David Murray44b548d2016-09-08 13:59:53 -0400132 self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000133
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000134 def test_bug_449964(self):
135 # fails for group followed by other escape
R David Murray44b548d2016-09-08 13:59:53 -0400136 self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000137 'xx\bxx\b')
138
139 def test_bug_449000(self):
140 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000141 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
142 'abc\ndef\n')
143 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
144 'abc\ndef\n')
145 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
146 'abc\ndef\n')
147 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
148 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000149
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000150 def test_bug_1661(self):
151 # Verify that flags do not get silently ignored with compiled patterns
152 pattern = re.compile('.')
153 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
154 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
155 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
156 self.assertRaises(ValueError, re.compile, pattern, re.I)
157
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000158 def test_bug_3629(self):
159 # A regex that triggered a bug in the sre-code validator
160 re.compile("(?P<quote>)(?(quote))")
161
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000162 def test_sub_template_numeric_escape(self):
163 # bug 776311 and friends
164 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
165 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
166 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
167 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
168 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
169 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
170 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200171 self.assertEqual(re.sub('x', r'\377', 'x'), '\377')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000172
173 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
174 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
175
176 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
177 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
178 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
179 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
180 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
181
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200182 self.checkTemplateError('x', r'\400', 'x',
183 r'octal escape value \400 outside of '
184 r'range 0-0o377', 0)
185 self.checkTemplateError('x', r'\777', 'x',
186 r'octal escape value \777 outside of '
187 r'range 0-0o377', 0)
Tim Peters0e9980f2004-09-12 03:49:31 +0000188
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300189 self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
190 self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
191 self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
192 self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
193 self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
194 self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
195 self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
196 self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
197 self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
198 self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
199 self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
200 self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
201 self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000202
203 # in python2.3 (etc), these loop endlessly in sre_parser.py
204 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
205 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
206 'xz8')
207 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
208 'xza')
209
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000210 def test_qualified_re_sub(self):
211 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300212 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Victor Stinner55e614a2014-10-29 16:58:59 +0100213 self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000214
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000215 def test_bug_114660(self):
216 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
217 'hello there')
218
219 def test_bug_462270(self):
220 # Test for empty sub() behaviour, see SF bug #462270
221 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
222 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
223
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200224 def test_symbolic_groups(self):
R David Murray44b548d2016-09-08 13:59:53 -0400225 re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
226 re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
227 re.compile(r'(?P<a1>x)\1(?(1)y)')
228 self.checkPatternError(r'(?P<a>)(?P<a>)',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200229 "redefinition of group name 'a' as group 2; "
230 "was group 1")
R David Murray44b548d2016-09-08 13:59:53 -0400231 self.checkPatternError(r'(?P<a>(?P=a))',
Serhiy Storchaka485407c2015-07-18 23:27:00 +0300232 "cannot refer to an open group", 10)
R David Murray44b548d2016-09-08 13:59:53 -0400233 self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px')
234 self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11)
235 self.checkPatternError(r'(?P=', 'missing group name', 4)
236 self.checkPatternError(r'(?P=)', 'missing group name', 4)
237 self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4)
238 self.checkPatternError(r'(?P=a)', "unknown group name 'a'")
239 self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'")
240 self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4)
241 self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4)
242 self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4)
243 self.checkPatternError(r'(?P<', 'missing group name', 4)
244 self.checkPatternError(r'(?P<>)', 'missing group name', 4)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200245 self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
246 self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
247 self.checkPatternError(r'(?(', 'missing group name', 3)
248 self.checkPatternError(r'(?())', 'missing group name', 3)
249 self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
250 self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
251 self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
252 self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200253 # New valid/invalid identifiers in Python 3
254 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
255 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200256 self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300257 # Support > 100 groups.
258 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
259 pat = '(?:%s)(?(200)z|t)' % pat
260 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200261
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000262 def test_symbolic_refs(self):
R David Murray44b548d2016-09-08 13:59:53 -0400263 self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200264 'missing >, unterminated name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400265 self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200266 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400267 self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2)
268 self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200269 "bad character in group name 'a a'", 3)
R David Murray44b548d2016-09-08 13:59:53 -0400270 self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200271 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400272 self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200273 "bad character in group name '1a1'", 3)
274 self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300275 'invalid group reference 2', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200276 self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300277 'invalid group reference 2', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200278 with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
R David Murray44b548d2016-09-08 13:59:53 -0400279 re.sub('(?P<a>x)', r'\g<ab>', 'xx')
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300280 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
281 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
R David Murray44b548d2016-09-08 13:59:53 -0400282 self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200283 "bad character in group name '-1'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200284 # New valid/invalid identifiers in Python 3
285 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
286 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
R David Murray44b548d2016-09-08 13:59:53 -0400287 self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200288 "bad character in group name '©'", 3)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300289 # Support > 100 groups.
290 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
R David Murray44b548d2016-09-08 13:59:53 -0400291 self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000292
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000293 def test_re_subn(self):
294 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
295 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
296 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
297 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300298 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Victor Stinner55e614a2014-10-29 16:58:59 +0100299 self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000300
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000301 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300302 for string in ":a:b::c", S(":a:b::c"):
303 self.assertTypedEqual(re.split(":", string),
304 ['', 'a', 'b', '', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200305 self.assertTypedEqual(re.split(":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300306 ['', 'a', 'b', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200307 self.assertTypedEqual(re.split("(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300308 ['', ':', 'a', ':', 'b', '::', 'c'])
309 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
310 memoryview(b":a:b::c")):
311 self.assertTypedEqual(re.split(b":", string),
312 [b'', b'a', b'b', b'', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200313 self.assertTypedEqual(re.split(b":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300314 [b'', b'a', b'b', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200315 self.assertTypedEqual(re.split(b"(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300316 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300317 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
318 "\U0001d49c\U0001d49e\U0001d4b5"):
319 string = ":%s:%s::%s" % (a, b, c)
320 self.assertEqual(re.split(":", string), ['', a, b, '', c])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200321 self.assertEqual(re.split(":+", string), ['', a, b, c])
322 self.assertEqual(re.split("(:+)", string),
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300323 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300324
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200325 self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
326 self.assertEqual(re.split("(:)+", ":a:b::c"),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000327 ['', ':', 'a', ':', 'b', ':', 'c'])
328 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
329 ['', ':', 'a', ':b::', 'c'])
330 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
331 ['', None, ':', 'a', None, ':', '', 'b', None, '',
332 None, '::', 'c'])
333 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
334 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000335
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200336 for sep, expected in [
337 (':*', ['', 'a', 'b', 'c']),
338 ('(?::*)', ['', 'a', 'b', 'c']),
339 ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
340 ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
341 ]:
342 with self.subTest(sep=sep), self.assertWarns(FutureWarning):
343 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
344
345 for sep, expected in [
346 ('', [':a:b::c']),
347 (r'\b', [':a:b::c']),
348 (r'(?=:)', [':a:b::c']),
349 (r'(?<=:)', [':a:b::c']),
350 ]:
351 with self.subTest(sep=sep), self.assertRaises(ValueError):
352 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
353
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000354 def test_qualified_re_split(self):
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300355 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
Victor Stinner55e614a2014-10-29 16:58:59 +0100356 self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
357 self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
358 self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000359 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200360 self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000361 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200362 with self.assertWarns(FutureWarning):
363 self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
364 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000365
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000366 def test_re_findall(self):
367 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300368 for string in "a:b::c:::d", S("a:b::c:::d"):
369 self.assertTypedEqual(re.findall(":+", string),
370 [":", "::", ":::"])
371 self.assertTypedEqual(re.findall("(:+)", string),
372 [":", "::", ":::"])
373 self.assertTypedEqual(re.findall("(:)(:*)", string),
374 [(":", ""), (":", ":"), (":", "::")])
375 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
376 memoryview(b"a:b::c:::d")):
377 self.assertTypedEqual(re.findall(b":+", string),
378 [b":", b"::", b":::"])
379 self.assertTypedEqual(re.findall(b"(:+)", string),
380 [b":", b"::", b":::"])
381 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
382 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300383 for x in ("\xe0", "\u0430", "\U0001d49c"):
384 xx = x * 2
385 xxx = x * 3
386 string = "a%sb%sc%sd" % (x, xx, xxx)
387 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
388 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
389 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
390 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000391
Skip Montanaro5ba00542003-04-25 16:00:14 +0000392 def test_bug_117612(self):
393 self.assertEqual(re.findall(r"(a|(b))", "aba"),
394 [("a", ""),("b", "b"),("a", "")])
395
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000396 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300397 for string in 'a', S('a'):
398 self.assertEqual(re.match('a', string).groups(), ())
399 self.assertEqual(re.match('(a)', string).groups(), ('a',))
400 self.assertEqual(re.match('(a)', string).group(0), 'a')
401 self.assertEqual(re.match('(a)', string).group(1), 'a')
402 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
403 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
404 self.assertEqual(re.match(b'a', string).groups(), ())
405 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
406 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
407 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
408 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300409 for a in ("\xe0", "\u0430", "\U0001d49c"):
410 self.assertEqual(re.match(a, a).groups(), ())
411 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
412 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
413 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
414 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000415
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000416 pat = re.compile('((a)|(b))(c)?')
417 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
418 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
419 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
420 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
421 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000422
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000423 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
424 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
425 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
426 (None, 'b', None))
427 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000428
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +0300429 def test_group(self):
430 class Index:
431 def __init__(self, value):
432 self.value = value
433 def __index__(self):
434 return self.value
435 # A single group
436 m = re.match('(a)(b)', 'ab')
437 self.assertEqual(m.group(), 'ab')
438 self.assertEqual(m.group(0), 'ab')
439 self.assertEqual(m.group(1), 'a')
440 self.assertEqual(m.group(Index(1)), 'a')
441 self.assertRaises(IndexError, m.group, -1)
442 self.assertRaises(IndexError, m.group, 3)
443 self.assertRaises(IndexError, m.group, 1<<1000)
444 self.assertRaises(IndexError, m.group, Index(1<<1000))
445 self.assertRaises(IndexError, m.group, 'x')
446 # Multiple groups
447 self.assertEqual(m.group(2, 1), ('b', 'a'))
448 self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))
449
Eric V. Smith605bdae2016-09-11 08:55:43 -0400450 def test_match_getitem(self):
451 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
452
453 m = pat.match('a')
454 self.assertEqual(m['a1'], 'a')
455 self.assertEqual(m['b2'], None)
456 self.assertEqual(m['c3'], None)
457 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
458 self.assertEqual(m[0], 'a')
459 self.assertEqual(m[1], 'a')
460 self.assertEqual(m[2], None)
461 self.assertEqual(m[3], None)
462 with self.assertRaisesRegex(IndexError, 'no such group'):
463 m['X']
464 with self.assertRaisesRegex(IndexError, 'no such group'):
465 m[-1]
466 with self.assertRaisesRegex(IndexError, 'no such group'):
467 m[4]
468 with self.assertRaisesRegex(IndexError, 'no such group'):
469 m[0, 1]
470 with self.assertRaisesRegex(IndexError, 'no such group'):
471 m[(0,)]
472 with self.assertRaisesRegex(IndexError, 'no such group'):
473 m[(0, 1)]
474 with self.assertRaisesRegex(KeyError, 'a2'):
475 'a1={a2}'.format_map(m)
476
477 m = pat.match('ac')
478 self.assertEqual(m['a1'], 'a')
479 self.assertEqual(m['b2'], None)
480 self.assertEqual(m['c3'], 'c')
481 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
482 self.assertEqual(m[0], 'ac')
483 self.assertEqual(m[1], 'a')
484 self.assertEqual(m[2], None)
485 self.assertEqual(m[3], 'c')
486
487 # Cannot assign.
488 with self.assertRaises(TypeError):
489 m[0] = 1
490
491 # No len().
492 self.assertRaises(TypeError, len, m)
493
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200494 def test_re_fullmatch(self):
495 # Issue 16203: Proposal: add re.fullmatch() method.
496 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
497 for string in "ab", S("ab"):
498 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
499 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
500 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
501 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
502 r = r"%s|%s" % (a, a + b)
503 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
504 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
505 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
506 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
507 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
508 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
509 self.assertIsNone(re.fullmatch(r"a+", "ab"))
510 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
511 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
512 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
513 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
514 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
515 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
516
517 self.assertEqual(
518 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
519 self.assertEqual(
520 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
521 self.assertEqual(
522 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
523
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000524 def test_re_groupref_exists(self):
R David Murray44b548d2016-09-08 13:59:53 -0400525 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000526 ('(', 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400527 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000528 (None, 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400529 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'))
530 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000531 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
532 ('a', 'b'))
R David Murray44b548d2016-09-08 13:59:53 -0400533 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000534 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400535 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000536 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400537 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000538 ('a', ''))
539
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000540 # Tests for bug #1177831: exercise groups other than the first group
541 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
542 self.assertEqual(p.match('abc').groups(),
543 ('a', 'b', 'c'))
544 self.assertEqual(p.match('ad').groups(),
545 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300546 self.assertIsNone(p.match('abd'))
547 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000548
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300549 # Support > 100 groups.
550 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
551 pat = '(?:%s)(?(200)z)' % pat
552 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000553
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200554 self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
555 self.checkPatternError(r'()(?(1)a|b',
556 'missing ), unterminated subpattern', 2)
557 self.checkPatternError(r'()(?(1)a|b|c)',
558 'conditional backref with more than '
559 'two branches', 10)
560
561 def test_re_groupref_overflow(self):
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300562 from sre_constants import MAXGROUPS
563 self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
564 'invalid group reference %d' % MAXGROUPS, 3)
565 self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
566 'invalid group reference %d' % MAXGROUPS, 10)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200567
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000568 def test_re_groupref(self):
569 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
570 ('|', 'a'))
571 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
572 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300573 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
574 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000575 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
576 ('a', 'a'))
577 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
578 (None, None))
579
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200580 self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)
581
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000582 def test_groupdict(self):
583 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
584 'first second').groupdict(),
585 {'first':'first', 'second':'second'})
586
587 def test_expand(self):
588 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
589 "first second")
590 .expand(r"\2 \1 \g<second> \g<first>"),
591 "second first second first")
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300592 self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
593 "first")
594 .expand(r"\2 \g<second>"),
595 " ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000596
597 def test_repeat_minmax(self):
R David Murray44b548d2016-09-08 13:59:53 -0400598 self.assertIsNone(re.match(r"^(\w){1}$", "abc"))
599 self.assertIsNone(re.match(r"^(\w){1}?$", "abc"))
600 self.assertIsNone(re.match(r"^(\w){1,2}$", "abc"))
601 self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000602
R David Murray44b548d2016-09-08 13:59:53 -0400603 self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c")
604 self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c")
605 self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c")
606 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
607 self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c")
608 self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c")
609 self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c")
610 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000611
R David Murray44b548d2016-09-08 13:59:53 -0400612 self.assertIsNone(re.match(r"^x{1}$", "xxx"))
613 self.assertIsNone(re.match(r"^x{1}?$", "xxx"))
614 self.assertIsNone(re.match(r"^x{1,2}$", "xxx"))
615 self.assertIsNone(re.match(r"^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000616
R David Murray44b548d2016-09-08 13:59:53 -0400617 self.assertTrue(re.match(r"^x{3}$", "xxx"))
618 self.assertTrue(re.match(r"^x{1,3}$", "xxx"))
619 self.assertTrue(re.match(r"^x{3,3}$", "xxx"))
620 self.assertTrue(re.match(r"^x{1,4}$", "xxx"))
621 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
622 self.assertTrue(re.match(r"^x{3}?$", "xxx"))
623 self.assertTrue(re.match(r"^x{1,3}?$", "xxx"))
624 self.assertTrue(re.match(r"^x{1,4}?$", "xxx"))
625 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000626
R David Murray44b548d2016-09-08 13:59:53 -0400627 self.assertIsNone(re.match(r"^x{}$", "xxx"))
628 self.assertTrue(re.match(r"^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000629
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200630 self.checkPatternError(r'x{2,1}',
631 'min repeat greater than max repeat', 2)
632
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000633 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000634 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000635 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000636 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
637 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
638 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
639 {'first': 1, 'other': 2})
640
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000641 self.assertEqual(re.match("(a)", "a").pos, 0)
642 self.assertEqual(re.match("(a)", "a").endpos, 1)
643 self.assertEqual(re.match("(a)", "a").string, "a")
644 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300645 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000646
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300647 # Issue 14260. groupindex should be non-modifiable mapping.
648 p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
649 self.assertEqual(sorted(p.groupindex), ['first', 'other'])
650 self.assertEqual(p.groupindex['other'], 2)
651 with self.assertRaises(TypeError):
652 p.groupindex['other'] = 0
653 self.assertEqual(p.groupindex['other'], 2)
654
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000655 def test_special_escapes(self):
656 self.assertEqual(re.search(r"\b(b.)\b",
657 "abcd abc bcd bx").group(1), "bx")
658 self.assertEqual(re.search(r"\B(b.)\B",
659 "abc bcd bc abxd").group(1), "bx")
660 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300661 "abcd abc bcd bx", re.ASCII).group(1), "bx")
662 self.assertEqual(re.search(r"\B(b.)\B",
663 "abc bcd bc abxd", re.ASCII).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000664 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
665 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300666 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300667 self.assertEqual(re.search(br"\b(b.)\b",
668 b"abcd abc bcd bx").group(1), b"bx")
669 self.assertEqual(re.search(br"\B(b.)\B",
670 b"abc bcd bc abxd").group(1), b"bx")
671 self.assertEqual(re.search(br"\b(b.)\b",
672 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
673 self.assertEqual(re.search(br"\B(b.)\B",
674 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
675 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
676 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300677 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000678 self.assertEqual(re.search(r"\d\D\w\W\s\S",
679 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300680 self.assertEqual(re.search(br"\d\D\w\W\s\S",
681 b"1aa! a").group(0), b"1aa! a")
682 self.assertEqual(re.search(r"\d\D\w\W\s\S",
683 "1aa! a", re.ASCII).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300684 self.assertEqual(re.search(br"\d\D\w\W\s\S",
685 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000686
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200687 def test_other_escapes(self):
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200688 self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200689 self.assertEqual(re.match(r"\(", '(').group(), '(')
690 self.assertIsNone(re.match(r"\(", ')'))
691 self.assertEqual(re.match(r"\\", '\\').group(), '\\')
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200692 self.assertEqual(re.match(r"[\]]", ']').group(), ']')
693 self.assertIsNone(re.match(r"[\]]", '['))
694 self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
695 self.assertIsNone(re.match(r"[a\-c]", 'b'))
696 self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
697 self.assertIsNone(re.match(r"[\^a]+", 'b'))
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200698 re.purge() # for warnings
699 for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
700 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300701 self.assertRaises(re.error, re.compile, '\\%c' % c)
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200702 for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
703 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300704 self.assertRaises(re.error, re.compile, '[\\%c]' % c)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200705
Ezio Melotti5a045b92012-02-29 11:48:44 +0200706 def test_string_boundaries(self):
707 # See http://bugs.python.org/issue10713
708 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
709 "abc")
710 # There's a word boundary at the start of a string.
711 self.assertTrue(re.match(r"\b", "abc"))
712 # A non-empty string includes a non-boundary zero-length match.
713 self.assertTrue(re.search(r"\B", "abc"))
714 # There is no non-boundary match at the start of a string.
715 self.assertFalse(re.match(r"\B", "abc"))
716 # However, an empty string contains no word boundaries, and also no
717 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300718 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200719 # This one is questionable and different from the perlre behaviour,
720 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300721 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200722 # A single word-character string has two boundaries, but no
723 # non-boundary gaps.
724 self.assertEqual(len(re.findall(r"\b", "a")), 2)
725 self.assertEqual(len(re.findall(r"\B", "a")), 0)
726 # If there are no words, there are no boundaries
727 self.assertEqual(len(re.findall(r"\b", " ")), 0)
728 self.assertEqual(len(re.findall(r"\b", " ")), 0)
729 # Can match around the whitespace.
730 self.assertEqual(len(re.findall(r"\B", " ")), 2)
731
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000732 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000733 self.assertEqual(re.match("([\u2222\u2223])",
734 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300735 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300736 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000737
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100738 def test_big_codesize(self):
739 # Issue #1160
740 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300741 self.assertTrue(r.match('1000'))
742 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100743
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000744 def test_anyall(self):
745 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
746 "a\nb")
747 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
748 "a\n\nb")
749
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200750 def test_lookahead(self):
R David Murray44b548d2016-09-08 13:59:53 -0400751 self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a")
752 self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a")
753 self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a")
754 self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000755 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
756 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
757 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
758
759 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
760 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
761 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
762 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
763
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200764 # Group reference.
765 self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
766 self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
767 # Conditional group reference.
768 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
769 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
770 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
771 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
772 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
773 # Group used before defined.
774 self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
775 self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
776 self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
777
778 def test_lookbehind(self):
779 self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
780 self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
781 self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
782 self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
783 # Group reference.
784 self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
785 self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
786 self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
787 self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
788 # Conditional group reference.
789 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
790 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
791 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
792 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
793 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
794 # Group used before defined.
795 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
796 self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
797 self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
798 # Group defined in the same lookbehind pattern
799 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
800 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
801 self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
802 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
803
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000804 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000805 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300806 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000807 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
808 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
809 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
810 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
811 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
812 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
813 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
814 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
815
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200816 assert '\u212a'.lower() == 'k' # 'K'
817 self.assertTrue(re.match(r'K', '\u212a', re.I))
818 self.assertTrue(re.match(r'k', '\u212a', re.I))
819 self.assertTrue(re.match(r'\u212a', 'K', re.I))
820 self.assertTrue(re.match(r'\u212a', 'k', re.I))
821 assert '\u017f'.upper() == 'S' # 'ſ'
822 self.assertTrue(re.match(r'S', '\u017f', re.I))
823 self.assertTrue(re.match(r's', '\u017f', re.I))
824 self.assertTrue(re.match(r'\u017f', 'S', re.I))
825 self.assertTrue(re.match(r'\u017f', 's', re.I))
826 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
827 self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
828 self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
829
830 def test_ignore_case_set(self):
831 self.assertTrue(re.match(r'[19A]', 'A', re.I))
832 self.assertTrue(re.match(r'[19a]', 'a', re.I))
833 self.assertTrue(re.match(r'[19a]', 'A', re.I))
834 self.assertTrue(re.match(r'[19A]', 'a', re.I))
835 self.assertTrue(re.match(br'[19A]', b'A', re.I))
836 self.assertTrue(re.match(br'[19a]', b'a', re.I))
837 self.assertTrue(re.match(br'[19a]', b'A', re.I))
838 self.assertTrue(re.match(br'[19A]', b'a', re.I))
839 assert '\u212a'.lower() == 'k' # 'K'
840 self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
841 self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
842 self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
843 self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
844 assert '\u017f'.upper() == 'S' # 'ſ'
845 self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
846 self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
847 self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
848 self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
849 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
850 self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
851 self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
852
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200853 def test_ignore_case_range(self):
854 # Issues #3511, #17381.
855 self.assertTrue(re.match(r'[9-a]', '_', re.I))
856 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
857 self.assertTrue(re.match(br'[9-a]', b'_', re.I))
858 self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
859 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
860 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
861 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
862 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
863 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
864 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
865 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
866 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
867 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
868 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
869 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
870 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
871
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200872 assert '\u212a'.lower() == 'k' # 'K'
873 self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
874 self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
875 self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
876 self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
877 assert '\u017f'.upper() == 'S' # 'ſ'
878 self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
879 self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
880 self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
881 self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
882 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
883 self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
884 self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
885
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000886 def test_category(self):
887 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
888
889 def test_getlower(self):
890 import _sre
891 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
892 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
893 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200894 self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000895
896 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300897 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200898 self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
899 self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000900
901 def test_not_literal(self):
R David Murray44b548d2016-09-08 13:59:53 -0400902 self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
903 self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000904
905 def test_search_coverage(self):
R David Murray44b548d2016-09-08 13:59:53 -0400906 self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
907 self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000908
Ezio Melottid2114eb2011-03-25 14:08:44 +0200909 def assertMatch(self, pattern, text, match=None, span=None,
910 matcher=re.match):
911 if match is None and span is None:
912 # the pattern matches the whole text
913 match = text
914 span = (0, len(text))
915 elif match is None or span is None:
916 raise ValueError('If match is not None, span should be specified '
917 '(and vice versa).')
918 m = matcher(pattern, text)
919 self.assertTrue(m)
920 self.assertEqual(m.group(), match)
921 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000922
Ezio Melottid2114eb2011-03-25 14:08:44 +0200923 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300924 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200925 p = ''.join(chr(i) for i in range(256))
926 for c in p:
927 if c in alnum_chars:
928 self.assertEqual(re.escape(c), c)
929 elif c == '\x00':
930 self.assertEqual(re.escape(c), '\\000')
931 else:
932 self.assertEqual(re.escape(c), '\\' + c)
933 self.assertMatch(re.escape(c), c)
934 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000935
Guido van Rossum698280d2008-09-10 17:44:35 +0000936 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300937 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200938 p = bytes(range(256))
939 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000940 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200941 if b in alnum_chars:
942 self.assertEqual(re.escape(b), b)
943 elif i == 0:
944 self.assertEqual(re.escape(b), b'\\000')
945 else:
946 self.assertEqual(re.escape(b), b'\\' + b)
947 self.assertMatch(re.escape(b), b)
948 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000949
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200950 def test_re_escape_non_ascii(self):
951 s = 'xxx\u2620\u2620\u2620xxx'
952 s_escaped = re.escape(s)
953 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
954 self.assertMatch(s_escaped, s)
955 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
956 'x\u2620\u2620\u2620x', (2, 7), re.search)
957
958 def test_re_escape_non_ascii_bytes(self):
959 b = 'y\u2620y\u2620y'.encode('utf-8')
960 b_escaped = re.escape(b)
961 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
962 self.assertMatch(b_escaped, b)
963 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
964 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000965
Serhiy Storchakab85a9762014-09-15 11:33:19 +0300966 def test_pickling(self):
967 import pickle
968 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
969 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
970 pickled = pickle.dumps(oldpat, proto)
971 newpat = pickle.loads(pickled)
972 self.assertEqual(newpat, oldpat)
973 # current pickle expects the _compile() reconstructor in re module
974 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +0000975
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000976 def test_constants(self):
977 self.assertEqual(re.I, re.IGNORECASE)
978 self.assertEqual(re.L, re.LOCALE)
979 self.assertEqual(re.M, re.MULTILINE)
980 self.assertEqual(re.S, re.DOTALL)
981 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000982
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000983 def test_flags(self):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200984 for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300985 self.assertTrue(re.compile('^pattern$', flag))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200986 for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
987 self.assertTrue(re.compile(b'^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000988
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000989 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200990 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
991 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300992 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
993 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
994 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
995 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
996 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
997 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200998 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300999 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
1000 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
1001 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
1002 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
1003 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
1004 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
1005 self.assertTrue(re.match(r"\0", "\000"))
1006 self.assertTrue(re.match(r"\08", "\0008"))
1007 self.assertTrue(re.match(r"\01", "\001"))
1008 self.assertTrue(re.match(r"\018", "\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001009 self.checkPatternError(r"\567",
1010 r'octal escape value \567 outside of '
1011 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001012 self.checkPatternError(r"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001013 self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
1014 self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
1015 self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
1016 self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
1017 self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
1018 self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
1019 self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001020
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001021 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +02001022 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
1023 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001024 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
1025 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
1026 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
1027 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
1028 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
1029 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
1030 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
1031 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001032 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001033 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
1034 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
1035 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
1036 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
1037 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
1038 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001039 self.checkPatternError(r"[\567]",
1040 r'octal escape value \567 outside of '
1041 r'range 0-0o377', 1)
1042 self.checkPatternError(r"[\911]", r'bad escape \9', 1)
1043 self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1)
1044 self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1)
1045 self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1)
1046 self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1)
Serhiy Storchakac563caf2014-09-23 23:22:41 +03001047 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001048
1049 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001050 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001051 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
1052 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
1053 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
1054 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
1055 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
1056 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001057 self.assertRaises(re.error, re.compile, br"\u1234")
1058 self.assertRaises(re.error, re.compile, br"\U00012345")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001059 self.assertTrue(re.match(br"\0", b"\000"))
1060 self.assertTrue(re.match(br"\08", b"\0008"))
1061 self.assertTrue(re.match(br"\01", b"\001"))
1062 self.assertTrue(re.match(br"\018", b"\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001063 self.checkPatternError(br"\567",
1064 r'octal escape value \567 outside of '
1065 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001066 self.checkPatternError(br"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001067 self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
1068 self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
Antoine Pitrou463badf2012-06-23 13:29:19 +02001069
1070 def test_sre_byte_class_literals(self):
1071 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001072 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
1073 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
1074 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
1075 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
1076 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
1077 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
1078 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
1079 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001080 self.assertRaises(re.error, re.compile, br"[\u1234]")
1081 self.assertRaises(re.error, re.compile, br"[\U00012345]")
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001082 self.checkPatternError(br"[\567]",
1083 r'octal escape value \567 outside of '
1084 r'range 0-0o377', 1)
1085 self.checkPatternError(br"[\911]", r'bad escape \9', 1)
1086 self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1)
1087
1088 def test_character_set_errors(self):
1089 self.checkPatternError(r'[', 'unterminated character set', 0)
1090 self.checkPatternError(r'[^', 'unterminated character set', 0)
1091 self.checkPatternError(r'[a', 'unterminated character set', 0)
1092 # bug 545855 -- This pattern failed to cause a compile error as it
1093 # should, instead provoking a TypeError.
1094 self.checkPatternError(r"[a-", 'unterminated character set', 0)
1095 self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1)
1096 self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1)
1097 self.checkPatternError(r"[b-a]", 'bad character range b-a', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001098
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001099 def test_bug_113254(self):
1100 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
1101 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
1102 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
1103
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001104 def test_bug_527371(self):
1105 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001106 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001107 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
1108 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
R David Murray44b548d2016-09-08 13:59:53 -04001109 self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a')
1110 self.assertEqual(re.match(r"((a))", "a").lastindex, 1)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001111
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001112 def test_bug_418626(self):
1113 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
1114 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
1115 # pattern '*?' on a long string.
1116 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
1117 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
1118 20003)
1119 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001120 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +00001121 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001122 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001123
1124 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001125 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001126 self.assertEqual(re.compile(pat) and 1, 1)
1127
Skip Montanaro1e703c62003-04-25 15:40:28 +00001128 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001129 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +00001130 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001131 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
1132 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
1133 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +00001134
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001135 def test_nothing_to_repeat(self):
1136 for reps in '*', '+', '?', '{1,2}':
1137 for mod in '', '?':
1138 self.checkPatternError('%s%s' % (reps, mod),
1139 'nothing to repeat', 0)
1140 self.checkPatternError('(?:%s%s)' % (reps, mod),
1141 'nothing to repeat', 3)
1142
1143 def test_multiple_repeat(self):
1144 for outer_reps in '*', '+', '{1,2}':
1145 for outer_mod in '', '?':
1146 outer_op = outer_reps + outer_mod
1147 for inner_reps in '*', '+', '?', '{1,2}':
1148 for inner_mod in '', '?':
1149 inner_op = inner_reps + inner_mod
1150 self.checkPatternError(r'x%s%s' % (inner_op, outer_op),
1151 'multiple repeat', 1 + len(inner_op))
1152
Serhiy Storchakafa468162013-02-16 21:23:53 +02001153 def test_unlimited_zero_width_repeat(self):
1154 # Issue #9669
1155 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
1156 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
1157 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
1158 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
1159 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
1160 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
1161
Skip Montanaro1e703c62003-04-25 15:40:28 +00001162 def test_scanner(self):
1163 def s_ident(scanner, token): return token
1164 def s_operator(scanner, token): return "op%s" % token
1165 def s_float(scanner, token): return float(token)
1166 def s_int(scanner, token): return int(token)
1167
1168 scanner = Scanner([
1169 (r"[a-zA-Z_]\w*", s_ident),
1170 (r"\d+\.\d*", s_float),
1171 (r"\d+", s_int),
1172 (r"=|\+|-|\*|/", s_operator),
1173 (r"\s+", None),
1174 ])
1175
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001176 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +00001177
Skip Montanaro1e703c62003-04-25 15:40:28 +00001178 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
1179 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
1180 'op+', 'bar'], ''))
1181
Skip Montanaro5ba00542003-04-25 16:00:14 +00001182 def test_bug_448951(self):
1183 # bug 448951 (similar to 429357, but with single char match)
1184 # (Also test greedy matches.)
1185 for op in '','?','*':
1186 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
1187 (None, None))
1188 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
1189 ('a:', 'a'))
1190
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001191 def test_bug_725106(self):
1192 # capturing groups in alternatives in repeats
1193 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
1194 ('b', 'a'))
1195 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
1196 ('c', 'b'))
1197 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
1198 ('b', None))
1199 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
1200 ('b', None))
1201 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
1202 ('b', 'a'))
1203 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
1204 ('c', 'b'))
1205 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
1206 ('b', None))
1207 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
1208 ('b', None))
1209
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +00001210 def test_bug_725149(self):
1211 # mark_stack_base restoring before restoring marks
1212 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
1213 ('a', None))
1214 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
1215 ('a', None, None))
1216
Just van Rossum12723ba2003-07-02 20:03:04 +00001217 def test_bug_764548(self):
1218 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001219 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +00001220 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001221 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +00001222
Skip Montanaro5ba00542003-04-25 16:00:14 +00001223 def test_finditer(self):
1224 iter = re.finditer(r":+", "a:b::c:::d")
1225 self.assertEqual([item.group(0) for item in iter],
1226 [":", "::", ":::"])
1227
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001228 pat = re.compile(r":+")
1229 iter = pat.finditer("a:b::c:::d", 1, 10)
1230 self.assertEqual([item.group(0) for item in iter],
1231 [":", "::", ":::"])
1232
1233 pat = re.compile(r":+")
1234 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
1235 self.assertEqual([item.group(0) for item in iter],
1236 [":", "::", ":::"])
1237
1238 pat = re.compile(r":+")
1239 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
1240 self.assertEqual([item.group(0) for item in iter],
1241 [":", "::", ":::"])
1242
1243 pat = re.compile(r":+")
1244 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
1245 self.assertEqual([item.group(0) for item in iter],
1246 ["::", "::"])
1247
Thomas Wouters40a088d2008-03-18 20:19:54 +00001248 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001249 self.assertIsNot(re.compile('bug_926075'),
1250 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +00001251
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001252 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001253 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001254 self.assertEqual(re.compile(pattern).split("a.b.c"),
1255 ['a','b','c'])
1256
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001257 def test_bug_581080(self):
1258 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +00001259 self.assertEqual(next(iter).span(), (1,2))
1260 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001261
1262 scanner = re.compile(r"\s").scanner("a b")
1263 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001264 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001265
1266 def test_bug_817234(self):
1267 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +00001268 self.assertEqual(next(iter).span(), (0, 4))
1269 self.assertEqual(next(iter).span(), (4, 4))
1270 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001271
Mark Dickinson1f268282009-07-28 17:22:36 +00001272 def test_bug_6561(self):
1273 # '\d' should match characters in Unicode category 'Nd'
1274 # (Number, Decimal Digit), but not those in 'Nl' (Number,
1275 # Letter) or 'No' (Number, Other).
1276 decimal_digits = [
1277 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
1278 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
1279 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
1280 ]
1281 for x in decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001282 self.assertEqual(re.match(r'^\d$', x).group(0), x)
Mark Dickinson1f268282009-07-28 17:22:36 +00001283
1284 not_decimal_digits = [
1285 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
1286 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
1287 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
1288 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
1289 ]
1290 for x in not_decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001291 self.assertIsNone(re.match(r'^\d$', x))
Mark Dickinson1f268282009-07-28 17:22:36 +00001292
Guido van Rossumd8faa362007-04-27 19:54:29 +00001293 def test_empty_array(self):
1294 # SF buf 1647541
1295 import array
Guido van Rossum166746c2007-07-03 15:39:16 +00001296 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +00001297 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001298 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +00001299 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001300
Christian Heimes072c0f12008-01-03 23:01:04 +00001301 def test_inline_flags(self):
1302 # Bug #1700
Serhiy Storchakaab140882014-11-11 21:13:28 +02001303 upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
1304 lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
Christian Heimes072c0f12008-01-03 23:01:04 +00001305
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001306 p = re.compile('.' + upper_char, re.I | re.S)
1307 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001308 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001309
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001310 p = re.compile('.' + lower_char, re.I | re.S)
1311 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001312 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001313
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001314 p = re.compile('(?i).' + upper_char, re.S)
1315 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001316 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001317
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001318 p = re.compile('(?i).' + lower_char, re.S)
1319 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001320 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001321
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001322 p = re.compile('(?is).' + upper_char)
1323 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001324 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001325
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001326 p = re.compile('(?is).' + lower_char)
1327 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001328 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001329
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001330 p = re.compile('(?s)(?i).' + upper_char)
1331 q = p.match('\n' + lower_char)
1332 self.assertTrue(q)
1333
1334 p = re.compile('(?s)(?i).' + lower_char)
1335 q = p.match('\n' + upper_char)
1336 self.assertTrue(q)
1337
1338 self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
1339 self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
1340 self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
1341 self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
1342 self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
Serhiy Storchakad65cd092016-09-11 01:39:01 +03001343
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001344 p = upper_char + '(?i)'
1345 with self.assertWarns(DeprecationWarning) as warns:
1346 self.assertTrue(re.match(p, lower_char))
1347 self.assertEqual(
1348 str(warns.warnings[0].message),
Serhiy Storchaka523a2432017-06-15 16:55:22 +03001349 'Flags not at the start of the expression %r' % p
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001350 )
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001351 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001352
1353 p = upper_char + '(?i)%s' % ('.?' * 100)
1354 with self.assertWarns(DeprecationWarning) as warns:
1355 self.assertTrue(re.match(p, lower_char))
1356 self.assertEqual(
1357 str(warns.warnings[0].message),
Serhiy Storchaka523a2432017-06-15 16:55:22 +03001358 'Flags not at the start of the expression %r (truncated)' % p[:20]
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001359 )
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001360 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchakabd48d272016-09-11 12:50:02 +03001361
Serhiy Storchaka523a2432017-06-15 16:55:22 +03001362 # bpo-30605: Compiling a bytes instance regex was throwing a BytesWarning
1363 with warnings.catch_warnings():
1364 warnings.simplefilter('error', BytesWarning)
1365 p = b'A(?i)'
1366 with self.assertWarns(DeprecationWarning) as warns:
1367 self.assertTrue(re.match(p, b'a'))
1368 self.assertEqual(
1369 str(warns.warnings[0].message),
1370 'Flags not at the start of the expression %r' % p
1371 )
1372 self.assertEqual(warns.warnings[0].filename, __file__)
1373
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001374 with self.assertWarns(DeprecationWarning):
1375 self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
1376 with self.assertWarns(DeprecationWarning):
1377 self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
1378 with self.assertWarns(DeprecationWarning):
1379 self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
1380 with self.assertWarns(DeprecationWarning):
1381 self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
1382 with self.assertWarns(DeprecationWarning):
1383 self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001384 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001385 self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001386 self.assertRegex(str(warns.warnings[0].message),
1387 'Flags not at the start')
1388 self.assertEqual(warns.warnings[0].filename, __file__)
1389 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001390 self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
1391 lower_char))
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001392 self.assertRegex(str(warns.warnings[0].message),
1393 'Flags not at the start')
1394 self.assertEqual(warns.warnings[0].filename, __file__)
1395 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001396 self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
1397 lower_char))
Serhiy Storchaka73fb45d2017-05-16 18:16:15 +03001398 self.assertRegex(str(warns.warnings[0].message),
1399 'Flags not at the start')
1400 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchaka418d60a2017-05-10 06:44:02 +03001401
1402
Christian Heimes25bb7832008-01-11 16:17:00 +00001403 def test_dollar_matches_twice(self):
1404 "$ matches the end of string, and just before the terminating \n"
1405 pattern = re.compile('$')
1406 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
1407 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
1408 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1409
1410 pattern = re.compile('$', re.MULTILINE)
1411 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1412 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1413 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1414
Antoine Pitroufd036452008-08-19 17:56:33 +00001415 def test_bytes_str_mixing(self):
1416 # Mixing str and bytes is disallowed
1417 pat = re.compile('.')
1418 bpat = re.compile(b'.')
1419 self.assertRaises(TypeError, pat.match, b'b')
1420 self.assertRaises(TypeError, bpat.match, 'b')
1421 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1422 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1423 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1424 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1425 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1426 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1427
1428 def test_ascii_and_unicode_flag(self):
1429 # String patterns
1430 for flags in (0, re.UNICODE):
1431 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001432 self.assertTrue(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001433 pat = re.compile(r'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001434 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001435 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001436 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001437 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001438 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001439 pat = re.compile(r'\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001440 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001441 pat = re.compile(r'(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001442 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001443 # Bytes patterns
1444 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001445 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001446 self.assertIsNone(pat.match(b'\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001447 pat = re.compile(br'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001448 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001449 # Incompatibilities
R David Murray44b548d2016-09-08 13:59:53 -04001450 self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE)
1451 self.assertRaises(ValueError, re.compile, br'(?u)\w')
1452 self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII)
1453 self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII)
1454 self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE)
1455 self.assertRaises(ValueError, re.compile, r'(?au)\w')
Antoine Pitroufd036452008-08-19 17:56:33 +00001456
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001457 def test_locale_flag(self):
1458 import locale
Benjamin Peterson6a4b04c2017-03-07 23:56:59 -08001459 _, enc = locale.getlocale(locale.LC_CTYPE)
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001460 # Search non-ASCII letter
1461 for i in range(128, 256):
1462 try:
1463 c = bytes([i]).decode(enc)
1464 sletter = c.lower()
1465 if sletter == c: continue
1466 bletter = sletter.encode(enc)
1467 if len(bletter) != 1: continue
1468 if bletter.decode(enc) != sletter: continue
1469 bpat = re.escape(bytes([i]))
1470 break
1471 except (UnicodeError, TypeError):
1472 pass
1473 else:
1474 bletter = None
1475 bpat = b'A'
1476 # Bytes patterns
1477 pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
1478 if bletter:
1479 self.assertTrue(pat.match(bletter))
1480 pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
1481 if bletter:
1482 self.assertTrue(pat.match(bletter))
1483 pat = re.compile(bpat, re.IGNORECASE)
1484 if bletter:
1485 self.assertIsNone(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001486 pat = re.compile(br'\w', re.LOCALE)
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001487 if bletter:
1488 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001489 pat = re.compile(br'(?L)\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001490 if bletter:
1491 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001492 pat = re.compile(br'\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001493 if bletter:
1494 self.assertIsNone(pat.match(bletter))
1495 # Incompatibilities
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001496 self.assertRaises(ValueError, re.compile, '', re.LOCALE)
1497 self.assertRaises(ValueError, re.compile, '(?L)')
1498 self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
1499 self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
1500 self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
1501 self.assertRaises(ValueError, re.compile, b'(?aL)')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001502
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001503 def test_scoped_flags(self):
1504 self.assertTrue(re.match(r'(?i:a)b', 'Ab'))
1505 self.assertIsNone(re.match(r'(?i:a)b', 'aB'))
1506 self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE))
1507 self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE))
1508 self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
1509 self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
1510
1511 self.assertTrue(re.match(r'(?x: a) b', 'a b'))
1512 self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
1513 self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
1514 self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
1515
1516 self.checkPatternError(r'(?a:\w)',
1517 'bad inline flags: cannot turn on global flag', 3)
1518 self.checkPatternError(r'(?a)(?-a:\w)',
1519 'bad inline flags: cannot turn off global flag', 8)
1520 self.checkPatternError(r'(?i-i:a)',
1521 'bad inline flags: flag turned on and off', 5)
1522
1523 self.checkPatternError(r'(?-', 'missing flag', 3)
1524 self.checkPatternError(r'(?-+', 'missing flag', 3)
1525 self.checkPatternError(r'(?-z', 'unknown flag', 3)
1526 self.checkPatternError(r'(?-i', 'missing :', 4)
1527 self.checkPatternError(r'(?-i)', 'missing :', 4)
1528 self.checkPatternError(r'(?-i+', 'missing :', 4)
1529 self.checkPatternError(r'(?-iz', 'unknown flag', 4)
1530 self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0)
1531 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
1532 self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
1533 self.checkPatternError(r'(?iz', 'unknown flag', 3)
1534
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001535 def test_bug_6509(self):
1536 # Replacement strings of both types must parse properly.
1537 # all strings
R David Murray44b548d2016-09-08 13:59:53 -04001538 pat = re.compile(r'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001539 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1540 pat = re.compile('a(.)')
1541 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1542 pat = re.compile('..')
1543 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1544
1545 # all bytes
R David Murray44b548d2016-09-08 13:59:53 -04001546 pat = re.compile(br'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001547 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1548 pat = re.compile(b'a(.)')
1549 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1550 pat = re.compile(b'..')
1551 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1552
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001553 def test_dealloc(self):
1554 # issue 3299: check for segfault in debug build
1555 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001556 # the overflow limit is different on wide and narrow builds and it
1557 # depends on the definition of SRE_CODE (see sre.h).
1558 # 2**128 should be big enough to overflow on both. For smaller values
1559 # a RuntimeError is raised instead of OverflowError.
1560 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001561 self.assertRaises(TypeError, re.finditer, "a", {})
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001562 with self.assertRaises(OverflowError):
1563 _sre.compile("abc", 0, [long_overflow], 0, [], [])
1564 with self.assertRaises(TypeError):
1565 _sre.compile({}, 0, [], 0, [], [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001566
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001567 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001568 self.assertTrue(re.search("123.*-", '123abc-'))
1569 self.assertTrue(re.search("123.*-", '123\xe9-'))
1570 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1571 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1572 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001573
Ezio Melottidf723e12012-03-13 01:29:48 +02001574 def test_compile(self):
1575 # Test return value when given string and pattern as parameter
1576 pattern = re.compile('random pattern')
1577 self.assertIsInstance(pattern, re._pattern_type)
1578 same_pattern = re.compile(pattern)
1579 self.assertIsInstance(same_pattern, re._pattern_type)
1580 self.assertIs(same_pattern, pattern)
1581 # Test behaviour when not given a string or pattern as parameter
1582 self.assertRaises(TypeError, re.compile, 0)
1583
Antoine Pitroub33941a2012-12-03 20:55:56 +01001584 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001585 def test_large_search(self, size):
1586 # Issue #10182: indices were 32-bit-truncated.
1587 s = 'a' * size
1588 m = re.search('$', s)
1589 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001590 self.assertEqual(m.start(), size)
1591 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001592
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001593 # The huge memuse is because of re.sub() using a list and a join()
1594 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001595 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001596 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001597 # Issue #10182: indices were 32-bit-truncated.
1598 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001599 r, n = re.subn('', '', s)
1600 self.assertEqual(r, s)
1601 self.assertEqual(n, size + 1)
1602
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001603 def test_bug_16688(self):
1604 # Issue 16688: Backreferences make case-insensitive regex fail on
1605 # non-ASCII strings.
1606 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1607 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001608
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001609 def test_repeat_minmax_overflow(self):
1610 # Issue #13169
1611 string = "x" * 100000
1612 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1613 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1614 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1615 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1616 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1617 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1618 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1619 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1620 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1621 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1622 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1623
1624 @cpython_only
1625 def test_repeat_minmax_overflow_maxrepeat(self):
1626 try:
1627 from _sre import MAXREPEAT
1628 except ImportError:
1629 self.skipTest('requires _sre.MAXREPEAT constant')
1630 string = "x" * 100000
1631 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1632 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1633 (0, 100000))
1634 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1635 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1636 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1637 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1638
R David Murray26dfaac92013-04-14 13:00:54 -04001639 def test_backref_group_name_in_exception(self):
1640 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001641 self.checkPatternError('(?P=<foo>)',
1642 "bad character in group name '<foo>'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001643
1644 def test_group_name_in_exception(self):
1645 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001646 self.checkPatternError('(?P<?foo>)',
1647 "bad character in group name '?foo'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001648
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001649 def test_issue17998(self):
1650 for reps in '*', '+', '?', '{1}':
1651 for mod in '', '?':
1652 pattern = '.' + reps + mod + 'yz'
1653 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1654 ['xyz'], msg=pattern)
1655 pattern = pattern.encode()
1656 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1657 [b'xyz'], msg=pattern)
1658
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001659 def test_match_repr(self):
1660 for string in '[abracadabra]', S('[abracadabra]'):
1661 m = re.search(r'(.+)(.*?)\1', string)
1662 self.assertEqual(repr(m), "<%s.%s object; "
1663 "span=(1, 12), match='abracadabra'>" %
1664 (type(m).__module__, type(m).__qualname__))
1665 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1666 bytearray(b'[abracadabra]'),
1667 memoryview(b'[abracadabra]')):
R David Murray44b548d2016-09-08 13:59:53 -04001668 m = re.search(br'(.+)(.*?)\1', string)
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001669 self.assertEqual(repr(m), "<%s.%s object; "
1670 "span=(1, 12), match=b'abracadabra'>" %
1671 (type(m).__module__, type(m).__qualname__))
1672
1673 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1674 self.assertEqual(repr(first), "<%s.%s object; "
1675 "span=(0, 2), match='aa'>" %
1676 (type(second).__module__, type(first).__qualname__))
1677 self.assertEqual(repr(second), "<%s.%s object; "
1678 "span=(3, 5), match='bb'>" %
1679 (type(second).__module__, type(second).__qualname__))
1680
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001681
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001682 def test_bug_2537(self):
1683 # issue 2537: empty submatches
1684 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1685 for inner_op in ('{0,}', '*', '?'):
1686 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1687 m = r.match("xyyzy")
1688 self.assertEqual(m.group(0), "xyy")
1689 self.assertEqual(m.group(1), "")
1690 self.assertEqual(m.group(2), "y")
1691
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001692 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001693 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001694 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001695 re.compile(pat, re.DEBUG)
1696 dump = '''\
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001697SUBPATTERN 1 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001698 LITERAL 46
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001699SUBPATTERN None 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001700 BRANCH
1701 IN
1702 LITERAL 99
1703 LITERAL 104
1704 OR
1705 LITERAL 112
1706 LITERAL 121
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001707SUBPATTERN None 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001708 GROUPREF_EXISTS 1
1709 AT AT_END
1710 ELSE
1711 LITERAL 58
1712 LITERAL 32
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001713'''
1714 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001715 # Debug output is output again even a second time (bypassing
1716 # the cache -- issue #20426).
1717 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001718 re.compile(pat, re.DEBUG)
1719 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001720
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001721 def test_keyword_parameters(self):
1722 # Issue #20283: Accepting the string keyword parameter.
1723 pat = re.compile(r'(ab)')
1724 self.assertEqual(
1725 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1726 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001727 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1728 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001729 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1730 self.assertEqual(
1731 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1732 self.assertEqual(
1733 pat.split(string='abracadabra', maxsplit=1),
1734 ['', 'ab', 'racadabra'])
1735 self.assertEqual(
1736 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1737 (7, 9))
1738
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001739 def test_bug_20998(self):
1740 # Issue #20998: Fullmatch of repeated single character pattern
1741 # with ignore case.
1742 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1743
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02001744 def test_locale_caching(self):
1745 # Issue #22410
1746 oldlocale = locale.setlocale(locale.LC_CTYPE)
1747 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1748 for loc in 'en_US.iso88591', 'en_US.utf8':
1749 try:
1750 locale.setlocale(locale.LC_CTYPE, loc)
1751 except locale.Error:
1752 # Unsupported locale on this system
1753 self.skipTest('test needs %s locale' % loc)
1754
1755 re.purge()
1756 self.check_en_US_iso88591()
1757 self.check_en_US_utf8()
1758 re.purge()
1759 self.check_en_US_utf8()
1760 self.check_en_US_iso88591()
1761
1762 def check_en_US_iso88591(self):
1763 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1764 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1765 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1766 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1767 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1768 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1769 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1770
1771 def check_en_US_utf8(self):
1772 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1773 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1774 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1775 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1776 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1777 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1778 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1779
Serhiy Storchakaad446d52014-11-10 13:49:00 +02001780 def test_error(self):
1781 with self.assertRaises(re.error) as cm:
1782 re.compile('(\u20ac))')
1783 err = cm.exception
1784 self.assertIsInstance(err.pattern, str)
1785 self.assertEqual(err.pattern, '(\u20ac))')
1786 self.assertEqual(err.pos, 3)
1787 self.assertEqual(err.lineno, 1)
1788 self.assertEqual(err.colno, 4)
1789 self.assertIn(err.msg, str(err))
1790 self.assertIn(' at position 3', str(err))
1791 self.assertNotIn(' at position 3', err.msg)
1792 # Bytes pattern
1793 with self.assertRaises(re.error) as cm:
1794 re.compile(b'(\xa4))')
1795 err = cm.exception
1796 self.assertIsInstance(err.pattern, bytes)
1797 self.assertEqual(err.pattern, b'(\xa4))')
1798 self.assertEqual(err.pos, 3)
1799 # Multiline pattern
1800 with self.assertRaises(re.error) as cm:
1801 re.compile("""
1802 (
1803 abc
1804 )
1805 )
1806 (
1807 """, re.VERBOSE)
1808 err = cm.exception
1809 self.assertEqual(err.pos, 77)
1810 self.assertEqual(err.lineno, 5)
1811 self.assertEqual(err.colno, 17)
1812 self.assertIn(err.msg, str(err))
1813 self.assertIn(' at position 77', str(err))
1814 self.assertIn('(line 5, column 17)', str(err))
1815
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001816 def test_misc_errors(self):
1817 self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0)
1818 self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0)
1819 self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5)
1820 self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
1821 self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
1822 self.checkPatternError(r'(?iz)', 'unknown flag', 3)
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001823 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001824 self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
1825 self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
1826 self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
1827 self.checkPatternError(r'(?', 'unexpected end of pattern', 2)
1828
Victor Stinner8bf43e62016-11-14 12:38:43 +01001829 def test_enum(self):
1830 # Issue #28082: Check that str(flag) returns a human readable string
1831 # instead of an integer
1832 self.assertIn('ASCII', str(re.A))
1833 self.assertIn('DOTALL', str(re.S))
1834
Victor Stinnerb44fb122016-11-21 16:35:08 +01001835 def test_pattern_compare(self):
1836 pattern1 = re.compile('abc', re.IGNORECASE)
1837
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01001838 # equal to itself
1839 self.assertEqual(pattern1, pattern1)
1840 self.assertFalse(pattern1 != pattern1)
1841
Victor Stinnerb44fb122016-11-21 16:35:08 +01001842 # equal
1843 re.purge()
1844 pattern2 = re.compile('abc', re.IGNORECASE)
1845 self.assertEqual(hash(pattern2), hash(pattern1))
1846 self.assertEqual(pattern2, pattern1)
1847
1848 # not equal: different pattern
1849 re.purge()
1850 pattern3 = re.compile('XYZ', re.IGNORECASE)
1851 # Don't test hash(pattern3) != hash(pattern1) because there is no
1852 # warranty that hash values are different
1853 self.assertNotEqual(pattern3, pattern1)
1854
1855 # not equal: different flag (flags=0)
1856 re.purge()
1857 pattern4 = re.compile('abc')
1858 self.assertNotEqual(pattern4, pattern1)
1859
1860 # only == and != comparison operators are supported
1861 with self.assertRaises(TypeError):
1862 pattern1 < pattern2
1863
1864 def test_pattern_compare_bytes(self):
1865 pattern1 = re.compile(b'abc')
1866
1867 # equal: test bytes patterns
1868 re.purge()
1869 pattern2 = re.compile(b'abc')
1870 self.assertEqual(hash(pattern2), hash(pattern1))
1871 self.assertEqual(pattern2, pattern1)
1872
1873 # not equal: pattern of a different types (str vs bytes),
1874 # comparison must not raise a BytesWarning
1875 re.purge()
1876 pattern3 = re.compile('abc')
1877 with warnings.catch_warnings():
1878 warnings.simplefilter('error', BytesWarning)
1879 self.assertNotEqual(pattern3, pattern1)
1880
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001881 def test_bug_29444(self):
1882 s = bytearray(b'abcdefgh')
1883 m = re.search(b'[a-h]+', s)
1884 m2 = re.search(b'[e-h]+', s)
1885 self.assertEqual(m.group(), b'abcdefgh')
1886 self.assertEqual(m2.group(), b'efgh')
1887 s[:] = b'xyz'
1888 self.assertEqual(m.group(), b'xyz')
1889 self.assertEqual(m2.group(), b'')
1890
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001891
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001892class PatternReprTests(unittest.TestCase):
1893 def check(self, pattern, expected):
1894 self.assertEqual(repr(re.compile(pattern)), expected)
1895
1896 def check_flags(self, pattern, flags, expected):
1897 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1898
1899 def test_without_flags(self):
1900 self.check('random pattern',
1901 "re.compile('random pattern')")
1902
1903 def test_single_flag(self):
1904 self.check_flags('random pattern', re.IGNORECASE,
1905 "re.compile('random pattern', re.IGNORECASE)")
1906
1907 def test_multiple_flags(self):
1908 self.check_flags('random pattern', re.I|re.S|re.X,
1909 "re.compile('random pattern', "
1910 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1911
1912 def test_unicode_flag(self):
1913 self.check_flags('random pattern', re.U,
1914 "re.compile('random pattern')")
1915 self.check_flags('random pattern', re.I|re.S|re.U,
1916 "re.compile('random pattern', "
1917 "re.IGNORECASE|re.DOTALL)")
1918
1919 def test_inline_flags(self):
1920 self.check('(?i)pattern',
1921 "re.compile('(?i)pattern', re.IGNORECASE)")
1922
1923 def test_unknown_flags(self):
1924 self.check_flags('random pattern', 0x123000,
1925 "re.compile('random pattern', 0x123000)")
1926 self.check_flags('random pattern', 0x123000|re.I,
1927 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1928
1929 def test_bytes(self):
1930 self.check(b'bytes pattern',
1931 "re.compile(b'bytes pattern')")
1932 self.check_flags(b'bytes pattern', re.A,
1933 "re.compile(b'bytes pattern', re.ASCII)")
1934
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001935 def test_locale(self):
1936 self.check_flags(b'bytes pattern', re.L,
1937 "re.compile(b'bytes pattern', re.LOCALE)")
1938
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001939 def test_quotes(self):
1940 self.check('random "double quoted" pattern',
1941 '''re.compile('random "double quoted" pattern')''')
1942 self.check("random 'single quoted' pattern",
1943 '''re.compile("random 'single quoted' pattern")''')
1944 self.check('''both 'single' and "double" quotes''',
1945 '''re.compile('both \\'single\\' and "double" quotes')''')
1946
1947 def test_long_pattern(self):
1948 pattern = 'Very %spattern' % ('long ' * 1000)
1949 r = repr(re.compile(pattern))
1950 self.assertLess(len(r), 300)
1951 self.assertEqual(r[:30], "re.compile('Very long long lon")
1952 r = repr(re.compile(pattern, re.I))
1953 self.assertLess(len(r), 300)
1954 self.assertEqual(r[:30], "re.compile('Very long long lon")
1955 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1956
1957
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001958class ImplementationTest(unittest.TestCase):
1959 """
1960 Test implementation details of the re module.
1961 """
1962
1963 def test_overlap_table(self):
1964 f = sre_compile._generate_overlap_table
1965 self.assertEqual(f(""), [])
1966 self.assertEqual(f("a"), [0])
1967 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1968 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1969 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1970 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1971
1972
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001973class ExternalTests(unittest.TestCase):
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001974
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001975 def test_re_benchmarks(self):
1976 're_tests benchmarks'
1977 from test.re_tests import benchmarks
1978 for pattern, s in benchmarks:
1979 with self.subTest(pattern=pattern, string=s):
1980 p = re.compile(pattern)
1981 self.assertTrue(p.search(s))
1982 self.assertTrue(p.match(s))
1983 self.assertTrue(p.fullmatch(s))
1984 s2 = ' '*10000 + s + ' '*10000
1985 self.assertTrue(p.search(s2))
1986 self.assertTrue(p.match(s2, 10000))
1987 self.assertTrue(p.match(s2, 10000, 10000 + len(s)))
1988 self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s)))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001989
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001990 def test_re_tests(self):
1991 're_tests test suite'
1992 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
1993 for t in tests:
1994 pattern = s = outcome = repl = expected = None
1995 if len(t) == 5:
1996 pattern, s, outcome, repl, expected = t
1997 elif len(t) == 3:
1998 pattern, s, outcome = t
Guido van Rossum41360a41998-03-26 19:42:58 +00001999 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002000 raise ValueError('Test tuples should have 3 or 5 fields', t)
2001
2002 with self.subTest(pattern=pattern, string=s):
2003 if outcome == SYNTAX_ERROR: # Expected a syntax error
2004 with self.assertRaises(re.error):
2005 re.compile(pattern)
2006 continue
2007
2008 obj = re.compile(pattern)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002009 result = obj.search(s)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002010 if outcome == FAIL:
2011 self.assertIsNone(result, 'Succeeded incorrectly')
2012 continue
2013
2014 with self.subTest():
2015 self.assertTrue(result, 'Failed incorrectly')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002016 # Matched, as expected, so now we compute the
2017 # result string and compare it to our expected result.
2018 start, end = result.span(0)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002019 vardict = {'found': result.group(0),
2020 'groups': result.group(),
2021 'flags': result.re.flags}
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002022 for i in range(1, 100):
2023 try:
2024 gi = result.group(i)
2025 # Special hack because else the string concat fails:
2026 if gi is None:
2027 gi = "None"
2028 except IndexError:
2029 gi = "Error"
2030 vardict['g%d' % i] = gi
2031 for i in result.re.groupindex.keys():
2032 try:
2033 gi = result.group(i)
2034 if gi is None:
2035 gi = "None"
2036 except IndexError:
2037 gi = "Error"
2038 vardict[i] = gi
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002039 self.assertEqual(eval(repl, vardict), expected,
2040 'grouping error')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002041
Antoine Pitrou22628c42008-07-22 17:53:22 +00002042 # Try the match with both pattern and string converted to
2043 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002044 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00002045 bpat = bytes(pattern, "ascii")
2046 bs = bytes(s, "ascii")
2047 except UnicodeEncodeError:
2048 # skip non-ascii tests
2049 pass
2050 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002051 with self.subTest('bytes pattern match'):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02002052 obj = re.compile(bpat)
2053 self.assertTrue(obj.search(bs))
2054
2055 # Try the match with LOCALE enabled, and check that it
2056 # still succeeds.
2057 with self.subTest('locale-sensitive match'):
2058 obj = re.compile(bpat, re.LOCALE)
2059 result = obj.search(bs)
2060 if result is None:
2061 print('=== Fails on locale-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002062
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002063 # Try the match with the search area limited to the extent
2064 # of the match and see if it still succeeds. \B will
2065 # break (because it won't match at the end or start of a
2066 # string), so we'll ignore patterns that feature it.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002067 if (pattern[:2] != r'\B' and pattern[-2:] != r'\B'
2068 and result is not None):
2069 with self.subTest('range-limited match'):
2070 obj = re.compile(pattern)
2071 self.assertTrue(obj.search(s, start, end + 1))
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00002072
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002073 # Try the match with IGNORECASE enabled, and check that it
2074 # still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002075 with self.subTest('case-insensitive match'):
2076 obj = re.compile(pattern, re.IGNORECASE)
2077 self.assertTrue(obj.search(s))
Guido van Rossumdfa67901997-12-08 17:12:06 +00002078
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002079 # Try the match with UNICODE locale enabled, and check
2080 # that it still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002081 with self.subTest('unicode-sensitive match'):
2082 obj = re.compile(pattern, re.UNICODE)
2083 self.assertTrue(obj.search(s))
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002084
Gregory P. Smith5a631832010-07-27 05:31:29 +00002085
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002086if __name__ == "__main__":
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002087 unittest.main()