blob: 4231e962b23e8a4fa4b801f1619ebcaaea8f6f9d [file] [log] [blame]
Victor Stinnerd6debb22017-03-27 16:05:26 +02001from test.support import (gc_collect, bigmemtest, _2G,
Erlend Egeberg Aasland0a3452e2021-06-24 01:46:25 +02002 cpython_only, captured_stdout,
3 check_disallow_instantiation)
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02006import sre_compile
Ezio Melottid2114eb2011-03-25 14:08:44 +02007import string
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02008import unittest
Victor Stinnerb44fb122016-11-21 16:35:08 +01009import warnings
10from re import Scanner
Raymond Hettinger027bb632004-05-31 03:09:25 +000011from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000012
Guido van Rossum23b22571997-07-17 22:36:14 +000013# Misc tests from Tim Peters' re.doc
14
Just van Rossum6802c6e2003-07-02 14:36:59 +000015# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020016# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000017# cover most of the code.
18
Serhiy Storchaka25324972013-10-16 12:46:28 +030019class S(str):
20 def __getitem__(self, index):
21 return S(super().__getitem__(index))
22
23class B(bytes):
24 def __getitem__(self, index):
25 return B(super().__getitem__(index))
26
Skip Montanaro8ed06da2003-04-24 19:43:18 +000027class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000028
Serhiy Storchaka25324972013-10-16 12:46:28 +030029 def assertTypedEqual(self, actual, expect, msg=None):
30 self.assertEqual(actual, expect, msg)
31 def recurse(actual, expect):
32 if isinstance(expect, (tuple, list)):
33 for x, y in zip(actual, expect):
34 recurse(x, y)
35 else:
36 self.assertIs(type(actual), type(expect), msg)
37 recurse(actual, expect)
38
Serhiy Storchaka632a77e2015-03-25 21:03:47 +020039 def checkPatternError(self, pattern, errmsg, pos=None):
40 with self.assertRaises(re.error) as cm:
41 re.compile(pattern)
42 with self.subTest(pattern=pattern):
43 err = cm.exception
44 self.assertEqual(err.msg, errmsg)
45 if pos is not None:
46 self.assertEqual(err.pos, pos)
47
48 def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
49 with self.assertRaises(re.error) as cm:
50 re.sub(pattern, repl, string)
51 with self.subTest(pattern=pattern, repl=repl):
52 err = cm.exception
53 self.assertEqual(err.msg, errmsg)
54 if pos is not None:
55 self.assertEqual(err.pos, pos)
56
Benjamin Petersone48944b2012-03-07 14:50:25 -060057 def test_keep_buffer(self):
58 # See bug 14212
59 b = bytearray(b'x')
60 it = re.finditer(b'a', b)
61 with self.assertRaises(BufferError):
62 b.extend(b'x'*400)
63 list(it)
64 del it
65 gc_collect()
66 b.extend(b'x'*400)
67
Raymond Hettinger027bb632004-05-31 03:09:25 +000068 def test_weakref(self):
69 s = 'QabbbcR'
70 x = re.compile('ab+c')
71 y = proxy(x)
72 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
73
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 def test_search_star_plus(self):
75 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
76 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
77 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
78 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030079 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000080 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
81 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
82 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
83 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030084 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000085
Skip Montanaro8ed06da2003-04-24 19:43:18 +000086 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000087 int_value = int(matchobj.group(0))
88 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000089
Skip Montanaro8ed06da2003-04-24 19:43:18 +000090 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030091 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
92 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
93 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
94 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
95 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
96 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030097 for y in ("\xe0", "\u0430", "\U0001d49c"):
98 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +030099
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000100 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
101 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
102 '9.3 -3 24x100y')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300103 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
104 '9.3 -3 23x99y')
Victor Stinner55e614a2014-10-29 16:58:59 +0100105 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000106 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000107
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000108 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
109 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +0000110
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000111 s = r"\1\1"
112 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
Serhiy Storchakab748e3b2017-12-12 19:21:50 +0200113 self.assertEqual(re.sub('(.)', s.replace('\\', r'\\'), 'x'), s)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000114 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +0000115
R David Murray44b548d2016-09-08 13:59:53 -0400116 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
117 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
118 self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx')
119 self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000120
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200121 self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
122 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
123 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
124 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
125 for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
126 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300127 with self.assertRaises(re.error):
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200128 self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
Guido van Rossum95e80531997-08-13 22:34:14 +0000129
R David Murray44b548d2016-09-08 13:59:53 -0400130 self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000131
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000132 def test_bug_449964(self):
133 # fails for group followed by other escape
R David Murray44b548d2016-09-08 13:59:53 -0400134 self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000135 'xx\bxx\b')
136
137 def test_bug_449000(self):
138 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000139 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
140 'abc\ndef\n')
141 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
142 'abc\ndef\n')
143 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
144 'abc\ndef\n')
145 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
146 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000147
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000148 def test_bug_1661(self):
149 # Verify that flags do not get silently ignored with compiled patterns
150 pattern = re.compile('.')
151 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
152 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
153 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
154 self.assertRaises(ValueError, re.compile, pattern, re.I)
155
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000156 def test_bug_3629(self):
157 # A regex that triggered a bug in the sre-code validator
158 re.compile("(?P<quote>)(?(quote))")
159
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000160 def test_sub_template_numeric_escape(self):
161 # bug 776311 and friends
162 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
163 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
164 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
165 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
166 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
167 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
168 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200169 self.assertEqual(re.sub('x', r'\377', 'x'), '\377')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000170
171 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
172 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
173
174 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
175 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
176 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
177 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
178 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
179
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200180 self.checkTemplateError('x', r'\400', 'x',
181 r'octal escape value \400 outside of '
182 r'range 0-0o377', 0)
183 self.checkTemplateError('x', r'\777', 'x',
184 r'octal escape value \777 outside of '
185 r'range 0-0o377', 0)
Tim Peters0e9980f2004-09-12 03:49:31 +0000186
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300187 self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
188 self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
189 self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
190 self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
191 self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
192 self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
193 self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
194 self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
195 self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
196 self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
197 self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
198 self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
199 self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000200
201 # in python2.3 (etc), these loop endlessly in sre_parser.py
202 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
203 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
204 'xz8')
205 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
206 'xza')
207
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 def test_qualified_re_sub(self):
209 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300210 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Victor Stinner55e614a2014-10-29 16:58:59 +0100211 self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000212
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000213 def test_bug_114660(self):
214 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
215 'hello there')
216
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200217 def test_symbolic_groups(self):
R David Murray44b548d2016-09-08 13:59:53 -0400218 re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
219 re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
220 re.compile(r'(?P<a1>x)\1(?(1)y)')
221 self.checkPatternError(r'(?P<a>)(?P<a>)',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200222 "redefinition of group name 'a' as group 2; "
223 "was group 1")
R David Murray44b548d2016-09-08 13:59:53 -0400224 self.checkPatternError(r'(?P<a>(?P=a))',
Serhiy Storchaka485407c2015-07-18 23:27:00 +0300225 "cannot refer to an open group", 10)
R David Murray44b548d2016-09-08 13:59:53 -0400226 self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px')
227 self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11)
228 self.checkPatternError(r'(?P=', 'missing group name', 4)
229 self.checkPatternError(r'(?P=)', 'missing group name', 4)
230 self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4)
231 self.checkPatternError(r'(?P=a)', "unknown group name 'a'")
232 self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'")
233 self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4)
234 self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4)
235 self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4)
236 self.checkPatternError(r'(?P<', 'missing group name', 4)
237 self.checkPatternError(r'(?P<>)', 'missing group name', 4)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200238 self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
239 self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
240 self.checkPatternError(r'(?(', 'missing group name', 3)
241 self.checkPatternError(r'(?())', 'missing group name', 3)
242 self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
243 self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
244 self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
245 self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200246 # New valid/invalid identifiers in Python 3
247 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
248 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200249 self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300250 # Support > 100 groups.
251 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
252 pat = '(?:%s)(?(200)z|t)' % pat
253 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200254
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000255 def test_symbolic_refs(self):
R David Murray44b548d2016-09-08 13:59:53 -0400256 self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200257 'missing >, unterminated name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400258 self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200259 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400260 self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2)
261 self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200262 "bad character in group name 'a a'", 3)
R David Murray44b548d2016-09-08 13:59:53 -0400263 self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200264 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400265 self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200266 "bad character in group name '1a1'", 3)
267 self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300268 'invalid group reference 2', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200269 self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300270 'invalid group reference 2', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200271 with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
R David Murray44b548d2016-09-08 13:59:53 -0400272 re.sub('(?P<a>x)', r'\g<ab>', 'xx')
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300273 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
274 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
R David Murray44b548d2016-09-08 13:59:53 -0400275 self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200276 "bad character in group name '-1'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200277 # New valid/invalid identifiers in Python 3
278 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
279 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
R David Murray44b548d2016-09-08 13:59:53 -0400280 self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200281 "bad character in group name '©'", 3)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300282 # Support > 100 groups.
283 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
R David Murray44b548d2016-09-08 13:59:53 -0400284 self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000285
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000286 def test_re_subn(self):
287 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
288 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
289 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
290 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300291 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Victor Stinner55e614a2014-10-29 16:58:59 +0100292 self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000293
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000294 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300295 for string in ":a:b::c", S(":a:b::c"):
296 self.assertTypedEqual(re.split(":", string),
297 ['', 'a', 'b', '', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200298 self.assertTypedEqual(re.split(":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300299 ['', 'a', 'b', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200300 self.assertTypedEqual(re.split("(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300301 ['', ':', 'a', ':', 'b', '::', 'c'])
302 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
303 memoryview(b":a:b::c")):
304 self.assertTypedEqual(re.split(b":", string),
305 [b'', b'a', b'b', b'', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200306 self.assertTypedEqual(re.split(b":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300307 [b'', b'a', b'b', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200308 self.assertTypedEqual(re.split(b"(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300309 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300310 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
311 "\U0001d49c\U0001d49e\U0001d4b5"):
312 string = ":%s:%s::%s" % (a, b, c)
313 self.assertEqual(re.split(":", string), ['', a, b, '', c])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200314 self.assertEqual(re.split(":+", string), ['', a, b, c])
315 self.assertEqual(re.split("(:+)", string),
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300316 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300317
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200318 self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
319 self.assertEqual(re.split("(:)+", ":a:b::c"),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000320 ['', ':', 'a', ':', 'b', ':', 'c'])
321 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
322 ['', ':', 'a', ':b::', 'c'])
323 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
324 ['', None, ':', 'a', None, ':', '', 'b', None, '',
325 None, '::', 'c'])
326 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
327 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000328
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200329 for sep, expected in [
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200330 (':*', ['', '', 'a', '', 'b', '', 'c', '']),
331 ('(?::*)', ['', '', 'a', '', 'b', '', 'c', '']),
332 ('(:*)', ['', ':', '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']),
333 ('(:)*', ['', ':', '', None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, '']),
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200334 ]:
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200335 with self.subTest(sep=sep):
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200336 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
337
338 for sep, expected in [
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200339 ('', ['', ':', 'a', ':', 'b', ':', ':', 'c', '']),
340 (r'\b', [':', 'a', ':', 'b', '::', 'c', '']),
341 (r'(?=:)', ['', ':a', ':b', ':', ':c']),
342 (r'(?<=:)', [':', 'a:', 'b:', ':', 'c']),
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200343 ]:
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200344 with self.subTest(sep=sep):
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200345 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
346
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000347 def test_qualified_re_split(self):
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300348 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
Victor Stinner55e614a2014-10-29 16:58:59 +0100349 self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
350 self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
351 self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000352 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200353 self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000354 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200355 self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200356 ['', ':', '', '', 'a:b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000357
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000358 def test_re_findall(self):
359 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300360 for string in "a:b::c:::d", S("a:b::c:::d"):
361 self.assertTypedEqual(re.findall(":+", string),
362 [":", "::", ":::"])
363 self.assertTypedEqual(re.findall("(:+)", string),
364 [":", "::", ":::"])
365 self.assertTypedEqual(re.findall("(:)(:*)", string),
366 [(":", ""), (":", ":"), (":", "::")])
367 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
368 memoryview(b"a:b::c:::d")):
369 self.assertTypedEqual(re.findall(b":+", string),
370 [b":", b"::", b":::"])
371 self.assertTypedEqual(re.findall(b"(:+)", string),
372 [b":", b"::", b":::"])
373 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
374 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 for x in ("\xe0", "\u0430", "\U0001d49c"):
376 xx = x * 2
377 xxx = x * 3
378 string = "a%sb%sc%sd" % (x, xx, xxx)
379 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
380 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
381 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
382 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000383
Skip Montanaro5ba00542003-04-25 16:00:14 +0000384 def test_bug_117612(self):
385 self.assertEqual(re.findall(r"(a|(b))", "aba"),
386 [("a", ""),("b", "b"),("a", "")])
387
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000388 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300389 for string in 'a', S('a'):
390 self.assertEqual(re.match('a', string).groups(), ())
391 self.assertEqual(re.match('(a)', string).groups(), ('a',))
392 self.assertEqual(re.match('(a)', string).group(0), 'a')
393 self.assertEqual(re.match('(a)', string).group(1), 'a')
394 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
395 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
396 self.assertEqual(re.match(b'a', string).groups(), ())
397 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
398 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
399 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
400 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300401 for a in ("\xe0", "\u0430", "\U0001d49c"):
402 self.assertEqual(re.match(a, a).groups(), ())
403 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
404 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
405 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
406 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000407
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000408 pat = re.compile('((a)|(b))(c)?')
409 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
410 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
411 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
412 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
413 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000414
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000415 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
416 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
417 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
418 (None, 'b', None))
419 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000420
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +0300421 def test_group(self):
422 class Index:
423 def __init__(self, value):
424 self.value = value
425 def __index__(self):
426 return self.value
427 # A single group
428 m = re.match('(a)(b)', 'ab')
429 self.assertEqual(m.group(), 'ab')
430 self.assertEqual(m.group(0), 'ab')
431 self.assertEqual(m.group(1), 'a')
432 self.assertEqual(m.group(Index(1)), 'a')
433 self.assertRaises(IndexError, m.group, -1)
434 self.assertRaises(IndexError, m.group, 3)
435 self.assertRaises(IndexError, m.group, 1<<1000)
436 self.assertRaises(IndexError, m.group, Index(1<<1000))
437 self.assertRaises(IndexError, m.group, 'x')
438 # Multiple groups
439 self.assertEqual(m.group(2, 1), ('b', 'a'))
440 self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))
441
Eric V. Smith605bdae2016-09-11 08:55:43 -0400442 def test_match_getitem(self):
443 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
444
445 m = pat.match('a')
446 self.assertEqual(m['a1'], 'a')
447 self.assertEqual(m['b2'], None)
448 self.assertEqual(m['c3'], None)
449 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
450 self.assertEqual(m[0], 'a')
451 self.assertEqual(m[1], 'a')
452 self.assertEqual(m[2], None)
453 self.assertEqual(m[3], None)
454 with self.assertRaisesRegex(IndexError, 'no such group'):
455 m['X']
456 with self.assertRaisesRegex(IndexError, 'no such group'):
457 m[-1]
458 with self.assertRaisesRegex(IndexError, 'no such group'):
459 m[4]
460 with self.assertRaisesRegex(IndexError, 'no such group'):
461 m[0, 1]
462 with self.assertRaisesRegex(IndexError, 'no such group'):
463 m[(0,)]
464 with self.assertRaisesRegex(IndexError, 'no such group'):
465 m[(0, 1)]
Serhiy Storchaka50754162017-08-03 11:45:23 +0300466 with self.assertRaisesRegex(IndexError, 'no such group'):
Eric V. Smith605bdae2016-09-11 08:55:43 -0400467 'a1={a2}'.format_map(m)
468
469 m = pat.match('ac')
470 self.assertEqual(m['a1'], 'a')
471 self.assertEqual(m['b2'], None)
472 self.assertEqual(m['c3'], 'c')
473 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
474 self.assertEqual(m[0], 'ac')
475 self.assertEqual(m[1], 'a')
476 self.assertEqual(m[2], None)
477 self.assertEqual(m[3], 'c')
478
479 # Cannot assign.
480 with self.assertRaises(TypeError):
481 m[0] = 1
482
483 # No len().
484 self.assertRaises(TypeError, len, m)
485
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200486 def test_re_fullmatch(self):
487 # Issue 16203: Proposal: add re.fullmatch() method.
488 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
489 for string in "ab", S("ab"):
490 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
491 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
492 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
493 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
494 r = r"%s|%s" % (a, a + b)
495 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
496 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
497 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
498 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
499 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
500 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
501 self.assertIsNone(re.fullmatch(r"a+", "ab"))
502 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
503 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
504 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
505 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
506 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
507 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
508
509 self.assertEqual(
510 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
511 self.assertEqual(
512 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
513 self.assertEqual(
514 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
515
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000516 def test_re_groupref_exists(self):
R David Murray44b548d2016-09-08 13:59:53 -0400517 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000518 ('(', 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400519 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000520 (None, 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400521 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'))
522 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000523 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
524 ('a', 'b'))
R David Murray44b548d2016-09-08 13:59:53 -0400525 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000526 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400527 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000528 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400529 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000530 ('a', ''))
531
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000532 # Tests for bug #1177831: exercise groups other than the first group
533 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
534 self.assertEqual(p.match('abc').groups(),
535 ('a', 'b', 'c'))
536 self.assertEqual(p.match('ad').groups(),
537 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300538 self.assertIsNone(p.match('abd'))
539 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000540
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300541 # Support > 100 groups.
542 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
543 pat = '(?:%s)(?(200)z)' % pat
544 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000545
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200546 self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
547 self.checkPatternError(r'()(?(1)a|b',
548 'missing ), unterminated subpattern', 2)
549 self.checkPatternError(r'()(?(1)a|b|c)',
550 'conditional backref with more than '
551 'two branches', 10)
552
553 def test_re_groupref_overflow(self):
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300554 from sre_constants import MAXGROUPS
555 self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
556 'invalid group reference %d' % MAXGROUPS, 3)
557 self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
558 'invalid group reference %d' % MAXGROUPS, 10)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200559
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000560 def test_re_groupref(self):
561 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
562 ('|', 'a'))
563 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
564 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300565 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
566 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000567 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
568 ('a', 'a'))
569 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
570 (None, None))
571
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200572 self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)
573
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000574 def test_groupdict(self):
575 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
576 'first second').groupdict(),
577 {'first':'first', 'second':'second'})
578
579 def test_expand(self):
580 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
581 "first second")
582 .expand(r"\2 \1 \g<second> \g<first>"),
583 "second first second first")
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300584 self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
585 "first")
586 .expand(r"\2 \g<second>"),
587 " ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000588
589 def test_repeat_minmax(self):
R David Murray44b548d2016-09-08 13:59:53 -0400590 self.assertIsNone(re.match(r"^(\w){1}$", "abc"))
591 self.assertIsNone(re.match(r"^(\w){1}?$", "abc"))
592 self.assertIsNone(re.match(r"^(\w){1,2}$", "abc"))
593 self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000594
R David Murray44b548d2016-09-08 13:59:53 -0400595 self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c")
596 self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c")
597 self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c")
598 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
599 self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c")
600 self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c")
601 self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c")
602 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000603
R David Murray44b548d2016-09-08 13:59:53 -0400604 self.assertIsNone(re.match(r"^x{1}$", "xxx"))
605 self.assertIsNone(re.match(r"^x{1}?$", "xxx"))
606 self.assertIsNone(re.match(r"^x{1,2}$", "xxx"))
607 self.assertIsNone(re.match(r"^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000608
R David Murray44b548d2016-09-08 13:59:53 -0400609 self.assertTrue(re.match(r"^x{3}$", "xxx"))
610 self.assertTrue(re.match(r"^x{1,3}$", "xxx"))
611 self.assertTrue(re.match(r"^x{3,3}$", "xxx"))
612 self.assertTrue(re.match(r"^x{1,4}$", "xxx"))
613 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
614 self.assertTrue(re.match(r"^x{3}?$", "xxx"))
615 self.assertTrue(re.match(r"^x{1,3}?$", "xxx"))
616 self.assertTrue(re.match(r"^x{1,4}?$", "xxx"))
617 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000618
R David Murray44b548d2016-09-08 13:59:53 -0400619 self.assertIsNone(re.match(r"^x{}$", "xxx"))
620 self.assertTrue(re.match(r"^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000621
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200622 self.checkPatternError(r'x{2,1}',
623 'min repeat greater than max repeat', 2)
624
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000625 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000626 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000627 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000628 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
629 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
630 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
631 {'first': 1, 'other': 2})
632
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000633 self.assertEqual(re.match("(a)", "a").pos, 0)
634 self.assertEqual(re.match("(a)", "a").endpos, 1)
635 self.assertEqual(re.match("(a)", "a").string, "a")
636 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300637 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000638
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300639 # Issue 14260. groupindex should be non-modifiable mapping.
640 p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
641 self.assertEqual(sorted(p.groupindex), ['first', 'other'])
642 self.assertEqual(p.groupindex['other'], 2)
643 with self.assertRaises(TypeError):
644 p.groupindex['other'] = 0
645 self.assertEqual(p.groupindex['other'], 2)
646
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000647 def test_special_escapes(self):
648 self.assertEqual(re.search(r"\b(b.)\b",
649 "abcd abc bcd bx").group(1), "bx")
650 self.assertEqual(re.search(r"\B(b.)\B",
651 "abc bcd bc abxd").group(1), "bx")
652 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300653 "abcd abc bcd bx", re.ASCII).group(1), "bx")
654 self.assertEqual(re.search(r"\B(b.)\B",
655 "abc bcd bc abxd", re.ASCII).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000656 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
657 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300658 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300659 self.assertEqual(re.search(br"\b(b.)\b",
660 b"abcd abc bcd bx").group(1), b"bx")
661 self.assertEqual(re.search(br"\B(b.)\B",
662 b"abc bcd bc abxd").group(1), b"bx")
663 self.assertEqual(re.search(br"\b(b.)\b",
664 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
665 self.assertEqual(re.search(br"\B(b.)\B",
666 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
667 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
668 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300669 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000670 self.assertEqual(re.search(r"\d\D\w\W\s\S",
671 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300672 self.assertEqual(re.search(br"\d\D\w\W\s\S",
673 b"1aa! a").group(0), b"1aa! a")
674 self.assertEqual(re.search(r"\d\D\w\W\s\S",
675 "1aa! a", re.ASCII).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300676 self.assertEqual(re.search(br"\d\D\w\W\s\S",
677 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000678
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200679 def test_other_escapes(self):
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200680 self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200681 self.assertEqual(re.match(r"\(", '(').group(), '(')
682 self.assertIsNone(re.match(r"\(", ')'))
683 self.assertEqual(re.match(r"\\", '\\').group(), '\\')
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200684 self.assertEqual(re.match(r"[\]]", ']').group(), ']')
685 self.assertIsNone(re.match(r"[\]]", '['))
686 self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
687 self.assertIsNone(re.match(r"[a\-c]", 'b'))
688 self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
689 self.assertIsNone(re.match(r"[\^a]+", 'b'))
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200690 re.purge() # for warnings
691 for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
692 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300693 self.assertRaises(re.error, re.compile, '\\%c' % c)
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200694 for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
695 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300696 self.assertRaises(re.error, re.compile, '[\\%c]' % c)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200697
Serhiy Storchakaa445feb2018-02-10 00:08:17 +0200698 def test_named_unicode_escapes(self):
699 # test individual Unicode named escapes
700 self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<'))
701 self.assertTrue(re.match(r'\N{less-than sign}', '<'))
702 self.assertIsNone(re.match(r'\N{LESS-THAN SIGN}', '>'))
703 self.assertTrue(re.match(r'\N{SNAKE}', '\U0001f40d'))
704 self.assertTrue(re.match(r'\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH '
705 r'HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}',
706 '\ufbf9'))
707 self.assertTrue(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
708 '='))
709 self.assertIsNone(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]',
710 ';'))
711
712 # test errors in \N{name} handling - only valid names should pass
713 self.checkPatternError(r'\N', 'missing {', 2)
714 self.checkPatternError(r'[\N]', 'missing {', 3)
715 self.checkPatternError(r'\N{', 'missing character name', 3)
716 self.checkPatternError(r'[\N{', 'missing character name', 4)
717 self.checkPatternError(r'\N{}', 'missing character name', 3)
718 self.checkPatternError(r'[\N{}]', 'missing character name', 4)
719 self.checkPatternError(r'\NSNAKE}', 'missing {', 2)
720 self.checkPatternError(r'[\NSNAKE}]', 'missing {', 3)
721 self.checkPatternError(r'\N{SNAKE',
722 'missing }, unterminated name', 3)
723 self.checkPatternError(r'[\N{SNAKE]',
724 'missing }, unterminated name', 4)
725 self.checkPatternError(r'[\N{SNAKE]}',
726 "undefined character name 'SNAKE]'", 1)
727 self.checkPatternError(r'\N{SPAM}',
728 "undefined character name 'SPAM'", 0)
729 self.checkPatternError(r'[\N{SPAM}]',
730 "undefined character name 'SPAM'", 1)
731 self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
732 self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
733
Ezio Melotti5a045b92012-02-29 11:48:44 +0200734 def test_string_boundaries(self):
735 # See http://bugs.python.org/issue10713
736 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
737 "abc")
738 # There's a word boundary at the start of a string.
739 self.assertTrue(re.match(r"\b", "abc"))
740 # A non-empty string includes a non-boundary zero-length match.
741 self.assertTrue(re.search(r"\B", "abc"))
742 # There is no non-boundary match at the start of a string.
743 self.assertFalse(re.match(r"\B", "abc"))
744 # However, an empty string contains no word boundaries, and also no
745 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300746 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200747 # This one is questionable and different from the perlre behaviour,
748 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300749 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200750 # A single word-character string has two boundaries, but no
751 # non-boundary gaps.
752 self.assertEqual(len(re.findall(r"\b", "a")), 2)
753 self.assertEqual(len(re.findall(r"\B", "a")), 0)
754 # If there are no words, there are no boundaries
755 self.assertEqual(len(re.findall(r"\b", " ")), 0)
756 self.assertEqual(len(re.findall(r"\b", " ")), 0)
757 # Can match around the whitespace.
758 self.assertEqual(len(re.findall(r"\B", " ")), 2)
759
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000760 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000761 self.assertEqual(re.match("([\u2222\u2223])",
762 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300763 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300764 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000765
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100766 def test_big_codesize(self):
767 # Issue #1160
768 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300769 self.assertTrue(r.match('1000'))
770 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100771
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000772 def test_anyall(self):
773 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
774 "a\nb")
775 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
776 "a\n\nb")
777
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200778 def test_lookahead(self):
R David Murray44b548d2016-09-08 13:59:53 -0400779 self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a")
780 self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a")
781 self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a")
782 self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000783 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
784 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
785 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
786
787 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
788 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
789 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
790 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
791
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200792 # Group reference.
793 self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
794 self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
795 # Conditional group reference.
796 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
797 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
798 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
799 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
800 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
801 # Group used before defined.
802 self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
803 self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
804 self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
805
806 def test_lookbehind(self):
807 self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
808 self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
809 self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
810 self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
811 # Group reference.
812 self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
813 self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
814 self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
815 self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
816 # Conditional group reference.
817 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
818 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
819 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
820 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
821 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
822 # Group used before defined.
823 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
824 self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
825 self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
826 # Group defined in the same lookbehind pattern
827 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
828 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
829 self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
830 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
831
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000832 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000833 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300834 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000835 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
836 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
837 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
838 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
839 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
840 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
841 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
842 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
843
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200844 assert '\u212a'.lower() == 'k' # 'K'
845 self.assertTrue(re.match(r'K', '\u212a', re.I))
846 self.assertTrue(re.match(r'k', '\u212a', re.I))
847 self.assertTrue(re.match(r'\u212a', 'K', re.I))
848 self.assertTrue(re.match(r'\u212a', 'k', re.I))
849 assert '\u017f'.upper() == 'S' # 'ſ'
850 self.assertTrue(re.match(r'S', '\u017f', re.I))
851 self.assertTrue(re.match(r's', '\u017f', re.I))
852 self.assertTrue(re.match(r'\u017f', 'S', re.I))
853 self.assertTrue(re.match(r'\u017f', 's', re.I))
854 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
855 self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
856 self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
857
858 def test_ignore_case_set(self):
859 self.assertTrue(re.match(r'[19A]', 'A', re.I))
860 self.assertTrue(re.match(r'[19a]', 'a', re.I))
861 self.assertTrue(re.match(r'[19a]', 'A', re.I))
862 self.assertTrue(re.match(r'[19A]', 'a', re.I))
863 self.assertTrue(re.match(br'[19A]', b'A', re.I))
864 self.assertTrue(re.match(br'[19a]', b'a', re.I))
865 self.assertTrue(re.match(br'[19a]', b'A', re.I))
866 self.assertTrue(re.match(br'[19A]', b'a', re.I))
867 assert '\u212a'.lower() == 'k' # 'K'
868 self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
869 self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
870 self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
871 self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
872 assert '\u017f'.upper() == 'S' # 'ſ'
873 self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
874 self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
875 self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
876 self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
877 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
878 self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
879 self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
880
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200881 def test_ignore_case_range(self):
882 # Issues #3511, #17381.
883 self.assertTrue(re.match(r'[9-a]', '_', re.I))
884 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
885 self.assertTrue(re.match(br'[9-a]', b'_', re.I))
886 self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
887 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
888 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
889 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
890 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
891 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
892 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
893 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
894 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
895 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
896 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
897 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
898 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
899
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200900 assert '\u212a'.lower() == 'k' # 'K'
901 self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
902 self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
903 self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
904 self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
905 assert '\u017f'.upper() == 'S' # 'ſ'
906 self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
907 self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
908 self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
909 self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
910 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
911 self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
912 self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
913
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000914 def test_category(self):
915 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
916
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300917 @cpython_only
918 def test_case_helpers(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000919 import _sre
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300920 for i in range(128):
921 c = chr(i)
922 lo = ord(c.lower())
923 self.assertEqual(_sre.ascii_tolower(i), lo)
924 self.assertEqual(_sre.unicode_tolower(i), lo)
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300925 iscased = c in string.ascii_letters
926 self.assertEqual(_sre.ascii_iscased(i), iscased)
927 self.assertEqual(_sre.unicode_iscased(i), iscased)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000928
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300929 for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
930 c = chr(i)
931 self.assertEqual(_sre.ascii_tolower(i), i)
932 if i != 0x0130:
933 self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300934 iscased = c != c.lower() or c != c.upper()
935 self.assertFalse(_sre.ascii_iscased(i))
936 self.assertEqual(_sre.unicode_iscased(i),
937 c != c.lower() or c != c.upper())
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300938
939 self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
940 self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300941 self.assertFalse(_sre.ascii_iscased(0x0130))
942 self.assertTrue(_sre.unicode_iscased(0x0130))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000943
944 def test_not_literal(self):
R David Murray44b548d2016-09-08 13:59:53 -0400945 self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
946 self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000947
Serhiy Storchaka05cb7282017-11-16 12:38:26 +0200948 def test_possible_set_operations(self):
949 s = bytes(range(128)).decode()
950 with self.assertWarns(FutureWarning):
951 p = re.compile(r'[0-9--1]')
952 self.assertEqual(p.findall(s), list('-./0123456789'))
953 self.assertEqual(re.findall(r'[--1]', s), list('-./01'))
954 with self.assertWarns(FutureWarning):
955 p = re.compile(r'[%--1]')
956 self.assertEqual(p.findall(s), list("%&'()*+,-1"))
957 with self.assertWarns(FutureWarning):
958 p = re.compile(r'[%--]')
959 self.assertEqual(p.findall(s), list("%&'()*+,-"))
960
961 with self.assertWarns(FutureWarning):
962 p = re.compile(r'[0-9&&1]')
963 self.assertEqual(p.findall(s), list('&0123456789'))
964 with self.assertWarns(FutureWarning):
965 p = re.compile(r'[\d&&1]')
966 self.assertEqual(p.findall(s), list('&0123456789'))
967 self.assertEqual(re.findall(r'[&&1]', s), list('&1'))
968
969 with self.assertWarns(FutureWarning):
970 p = re.compile(r'[0-9||a]')
971 self.assertEqual(p.findall(s), list('0123456789a|'))
972 with self.assertWarns(FutureWarning):
973 p = re.compile(r'[\d||a]')
974 self.assertEqual(p.findall(s), list('0123456789a|'))
975 self.assertEqual(re.findall(r'[||1]', s), list('1|'))
976
977 with self.assertWarns(FutureWarning):
978 p = re.compile(r'[0-9~~1]')
979 self.assertEqual(p.findall(s), list('0123456789~'))
980 with self.assertWarns(FutureWarning):
981 p = re.compile(r'[\d~~1]')
982 self.assertEqual(p.findall(s), list('0123456789~'))
983 self.assertEqual(re.findall(r'[~~1]', s), list('1~'))
984
985 with self.assertWarns(FutureWarning):
986 p = re.compile(r'[[0-9]|]')
987 self.assertEqual(p.findall(s), list('0123456789[]'))
988
989 with self.assertWarns(FutureWarning):
990 p = re.compile(r'[[:digit:]|]')
991 self.assertEqual(p.findall(s), list(':[]dgit'))
992
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000993 def test_search_coverage(self):
R David Murray44b548d2016-09-08 13:59:53 -0400994 self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
995 self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000996
Ezio Melottid2114eb2011-03-25 14:08:44 +0200997 def assertMatch(self, pattern, text, match=None, span=None,
Serhiy Storchaka59083002017-04-13 21:06:43 +0300998 matcher=re.fullmatch):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200999 if match is None and span is None:
1000 # the pattern matches the whole text
1001 match = text
1002 span = (0, len(text))
1003 elif match is None or span is None:
1004 raise ValueError('If match is not None, span should be specified '
1005 '(and vice versa).')
1006 m = matcher(pattern, text)
1007 self.assertTrue(m)
1008 self.assertEqual(m.group(), match)
1009 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +00001010
Serhiy Storchaka05cb7282017-11-16 12:38:26 +02001011 LITERAL_CHARS = string.ascii_letters + string.digits + '!"%\',/:;<=>@_`'
Serhiy Storchaka59083002017-04-13 21:06:43 +03001012
Ezio Melottid2114eb2011-03-25 14:08:44 +02001013 def test_re_escape(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +02001014 p = ''.join(chr(i) for i in range(256))
1015 for c in p:
Ezio Melottid2114eb2011-03-25 14:08:44 +02001016 self.assertMatch(re.escape(c), c)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001017 self.assertMatch('[' + re.escape(c) + ']', c)
1018 self.assertMatch('(?x)' + re.escape(c), c)
Ezio Melottid2114eb2011-03-25 14:08:44 +02001019 self.assertMatch(re.escape(p), p)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001020 for c in '-.]{}':
1021 self.assertEqual(re.escape(c)[:1], '\\')
1022 literal_chars = self.LITERAL_CHARS
1023 self.assertEqual(re.escape(literal_chars), literal_chars)
Guido van Rossum49946571997-07-18 04:26:25 +00001024
Serhiy Storchaka59083002017-04-13 21:06:43 +03001025 def test_re_escape_bytes(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +02001026 p = bytes(range(256))
1027 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +00001028 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +02001029 self.assertMatch(re.escape(b), b)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001030 self.assertMatch(b'[' + re.escape(b) + b']', b)
1031 self.assertMatch(b'(?x)' + re.escape(b), b)
Ezio Melottid2114eb2011-03-25 14:08:44 +02001032 self.assertMatch(re.escape(p), p)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001033 for i in b'-.]{}':
1034 b = bytes([i])
1035 self.assertEqual(re.escape(b)[:1], b'\\')
1036 literal_chars = self.LITERAL_CHARS.encode('ascii')
1037 self.assertEqual(re.escape(literal_chars), literal_chars)
Guido van Rossum698280d2008-09-10 17:44:35 +00001038
Ezio Melotti7b9e97b2011-03-25 14:09:33 +02001039 def test_re_escape_non_ascii(self):
1040 s = 'xxx\u2620\u2620\u2620xxx'
1041 s_escaped = re.escape(s)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001042 self.assertEqual(s_escaped, s)
Ezio Melotti7b9e97b2011-03-25 14:09:33 +02001043 self.assertMatch(s_escaped, s)
1044 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
1045 'x\u2620\u2620\u2620x', (2, 7), re.search)
1046
1047 def test_re_escape_non_ascii_bytes(self):
1048 b = 'y\u2620y\u2620y'.encode('utf-8')
1049 b_escaped = re.escape(b)
Serhiy Storchaka59083002017-04-13 21:06:43 +03001050 self.assertEqual(b_escaped, b)
Ezio Melotti7b9e97b2011-03-25 14:09:33 +02001051 self.assertMatch(b_escaped, b)
1052 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
1053 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +00001054
Serhiy Storchakab85a9762014-09-15 11:33:19 +03001055 def test_pickling(self):
1056 import pickle
1057 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
1058 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
1059 pickled = pickle.dumps(oldpat, proto)
1060 newpat = pickle.loads(pickled)
1061 self.assertEqual(newpat, oldpat)
1062 # current pickle expects the _compile() reconstructor in re module
1063 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +00001064
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001065 def test_copying(self):
1066 import copy
1067 p = re.compile(r'(?P<int>\d+)(?:\.(?P<frac>\d*))?')
1068 self.assertIs(copy.copy(p), p)
1069 self.assertIs(copy.deepcopy(p), p)
1070 m = p.match('12.34')
1071 self.assertIs(copy.copy(m), m)
1072 self.assertIs(copy.deepcopy(m), m)
1073
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001074 def test_constants(self):
1075 self.assertEqual(re.I, re.IGNORECASE)
1076 self.assertEqual(re.L, re.LOCALE)
1077 self.assertEqual(re.M, re.MULTILINE)
1078 self.assertEqual(re.S, re.DOTALL)
1079 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001080
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001081 def test_flags(self):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001082 for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001083 self.assertTrue(re.compile('^pattern$', flag))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001084 for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
1085 self.assertTrue(re.compile(b'^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +00001086
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001087 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +02001088 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
1089 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001090 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
1091 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
1092 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
1093 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
1094 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
1095 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001096 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001097 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
1098 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
1099 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
1100 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
1101 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
1102 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
1103 self.assertTrue(re.match(r"\0", "\000"))
1104 self.assertTrue(re.match(r"\08", "\0008"))
1105 self.assertTrue(re.match(r"\01", "\001"))
1106 self.assertTrue(re.match(r"\018", "\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001107 self.checkPatternError(r"\567",
1108 r'octal escape value \567 outside of '
1109 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001110 self.checkPatternError(r"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001111 self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
1112 self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
1113 self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
1114 self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
1115 self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
1116 self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
1117 self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001118
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001119 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +02001120 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
1121 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001122 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
1123 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
1124 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
1125 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
1126 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
1127 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
1128 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
1129 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001130 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001131 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
1132 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
1133 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
1134 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
1135 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
1136 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001137 self.checkPatternError(r"[\567]",
1138 r'octal escape value \567 outside of '
1139 r'range 0-0o377', 1)
1140 self.checkPatternError(r"[\911]", r'bad escape \9', 1)
1141 self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1)
1142 self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1)
1143 self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1)
1144 self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1)
Serhiy Storchakac563caf2014-09-23 23:22:41 +03001145 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001146
1147 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001148 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001149 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
1150 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
1151 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
1152 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
1153 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
1154 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001155 self.assertRaises(re.error, re.compile, br"\u1234")
1156 self.assertRaises(re.error, re.compile, br"\U00012345")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001157 self.assertTrue(re.match(br"\0", b"\000"))
1158 self.assertTrue(re.match(br"\08", b"\0008"))
1159 self.assertTrue(re.match(br"\01", b"\001"))
1160 self.assertTrue(re.match(br"\018", b"\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001161 self.checkPatternError(br"\567",
1162 r'octal escape value \567 outside of '
1163 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001164 self.checkPatternError(br"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001165 self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
1166 self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
Antoine Pitrou463badf2012-06-23 13:29:19 +02001167
1168 def test_sre_byte_class_literals(self):
1169 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001170 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
1171 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
1172 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
1173 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
1174 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
1175 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
1176 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
1177 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001178 self.assertRaises(re.error, re.compile, br"[\u1234]")
1179 self.assertRaises(re.error, re.compile, br"[\U00012345]")
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001180 self.checkPatternError(br"[\567]",
1181 r'octal escape value \567 outside of '
1182 r'range 0-0o377', 1)
1183 self.checkPatternError(br"[\911]", r'bad escape \9', 1)
1184 self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1)
1185
1186 def test_character_set_errors(self):
1187 self.checkPatternError(r'[', 'unterminated character set', 0)
1188 self.checkPatternError(r'[^', 'unterminated character set', 0)
1189 self.checkPatternError(r'[a', 'unterminated character set', 0)
1190 # bug 545855 -- This pattern failed to cause a compile error as it
1191 # should, instead provoking a TypeError.
1192 self.checkPatternError(r"[a-", 'unterminated character set', 0)
1193 self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1)
1194 self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1)
1195 self.checkPatternError(r"[b-a]", 'bad character range b-a', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001196
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001197 def test_bug_113254(self):
1198 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
1199 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
1200 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
1201
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001202 def test_bug_527371(self):
1203 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001204 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001205 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
1206 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
R David Murray44b548d2016-09-08 13:59:53 -04001207 self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a')
1208 self.assertEqual(re.match(r"((a))", "a").lastindex, 1)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001209
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001210 def test_bug_418626(self):
1211 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
1212 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
1213 # pattern '*?' on a long string.
1214 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
1215 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
1216 20003)
1217 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001218 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +00001219 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001220 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001221
1222 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001223 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001224 self.assertEqual(re.compile(pat) and 1, 1)
1225
Skip Montanaro1e703c62003-04-25 15:40:28 +00001226 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001227 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +00001228 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001229 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
1230 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
1231 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +00001232
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001233 def test_nothing_to_repeat(self):
1234 for reps in '*', '+', '?', '{1,2}':
1235 for mod in '', '?':
1236 self.checkPatternError('%s%s' % (reps, mod),
1237 'nothing to repeat', 0)
1238 self.checkPatternError('(?:%s%s)' % (reps, mod),
1239 'nothing to repeat', 3)
1240
1241 def test_multiple_repeat(self):
1242 for outer_reps in '*', '+', '{1,2}':
1243 for outer_mod in '', '?':
1244 outer_op = outer_reps + outer_mod
1245 for inner_reps in '*', '+', '?', '{1,2}':
1246 for inner_mod in '', '?':
1247 inner_op = inner_reps + inner_mod
1248 self.checkPatternError(r'x%s%s' % (inner_op, outer_op),
1249 'multiple repeat', 1 + len(inner_op))
1250
Serhiy Storchakafa468162013-02-16 21:23:53 +02001251 def test_unlimited_zero_width_repeat(self):
1252 # Issue #9669
1253 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
1254 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
1255 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
1256 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
1257 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
1258 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
1259
Skip Montanaro1e703c62003-04-25 15:40:28 +00001260 def test_scanner(self):
1261 def s_ident(scanner, token): return token
1262 def s_operator(scanner, token): return "op%s" % token
1263 def s_float(scanner, token): return float(token)
1264 def s_int(scanner, token): return int(token)
1265
1266 scanner = Scanner([
1267 (r"[a-zA-Z_]\w*", s_ident),
1268 (r"\d+\.\d*", s_float),
1269 (r"\d+", s_int),
1270 (r"=|\+|-|\*|/", s_operator),
1271 (r"\s+", None),
1272 ])
1273
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001274 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +00001275
Skip Montanaro1e703c62003-04-25 15:40:28 +00001276 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
1277 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
1278 'op+', 'bar'], ''))
1279
Skip Montanaro5ba00542003-04-25 16:00:14 +00001280 def test_bug_448951(self):
1281 # bug 448951 (similar to 429357, but with single char match)
1282 # (Also test greedy matches.)
1283 for op in '','?','*':
1284 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
1285 (None, None))
1286 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
1287 ('a:', 'a'))
1288
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001289 def test_bug_725106(self):
1290 # capturing groups in alternatives in repeats
1291 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
1292 ('b', 'a'))
1293 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
1294 ('c', 'b'))
1295 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
1296 ('b', None))
1297 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
1298 ('b', None))
1299 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
1300 ('b', 'a'))
1301 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
1302 ('c', 'b'))
1303 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
1304 ('b', None))
1305 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
1306 ('b', None))
1307
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +00001308 def test_bug_725149(self):
1309 # mark_stack_base restoring before restoring marks
1310 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
1311 ('a', None))
1312 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
1313 ('a', None, None))
1314
Just van Rossum12723ba2003-07-02 20:03:04 +00001315 def test_bug_764548(self):
1316 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001317 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +00001318 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001319 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +00001320
Skip Montanaro5ba00542003-04-25 16:00:14 +00001321 def test_finditer(self):
1322 iter = re.finditer(r":+", "a:b::c:::d")
1323 self.assertEqual([item.group(0) for item in iter],
1324 [":", "::", ":::"])
1325
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001326 pat = re.compile(r":+")
1327 iter = pat.finditer("a:b::c:::d", 1, 10)
1328 self.assertEqual([item.group(0) for item in iter],
1329 [":", "::", ":::"])
1330
1331 pat = re.compile(r":+")
1332 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
1333 self.assertEqual([item.group(0) for item in iter],
1334 [":", "::", ":::"])
1335
1336 pat = re.compile(r":+")
1337 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
1338 self.assertEqual([item.group(0) for item in iter],
1339 [":", "::", ":::"])
1340
1341 pat = re.compile(r":+")
1342 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
1343 self.assertEqual([item.group(0) for item in iter],
1344 ["::", "::"])
1345
Thomas Wouters40a088d2008-03-18 20:19:54 +00001346 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001347 self.assertIsNot(re.compile('bug_926075'),
1348 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +00001349
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001350 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001351 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001352 self.assertEqual(re.compile(pattern).split("a.b.c"),
1353 ['a','b','c'])
1354
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001355 def test_bug_581080(self):
1356 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +00001357 self.assertEqual(next(iter).span(), (1,2))
1358 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001359
1360 scanner = re.compile(r"\s").scanner("a b")
1361 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001362 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001363
1364 def test_bug_817234(self):
1365 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +00001366 self.assertEqual(next(iter).span(), (0, 4))
1367 self.assertEqual(next(iter).span(), (4, 4))
1368 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001369
Mark Dickinson1f268282009-07-28 17:22:36 +00001370 def test_bug_6561(self):
1371 # '\d' should match characters in Unicode category 'Nd'
1372 # (Number, Decimal Digit), but not those in 'Nl' (Number,
1373 # Letter) or 'No' (Number, Other).
1374 decimal_digits = [
1375 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
1376 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
1377 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
1378 ]
1379 for x in decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001380 self.assertEqual(re.match(r'^\d$', x).group(0), x)
Mark Dickinson1f268282009-07-28 17:22:36 +00001381
1382 not_decimal_digits = [
1383 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
1384 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
1385 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
1386 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
1387 ]
1388 for x in not_decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001389 self.assertIsNone(re.match(r'^\d$', x))
Mark Dickinson1f268282009-07-28 17:22:36 +00001390
Guido van Rossumd8faa362007-04-27 19:54:29 +00001391 def test_empty_array(self):
1392 # SF buf 1647541
1393 import array
Guido van Rossum166746c2007-07-03 15:39:16 +00001394 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +00001395 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001396 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +00001397 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001398
Christian Heimes072c0f12008-01-03 23:01:04 +00001399 def test_inline_flags(self):
1400 # Bug #1700
Serhiy Storchakaab140882014-11-11 21:13:28 +02001401 upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
1402 lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
Christian Heimes072c0f12008-01-03 23:01:04 +00001403
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001404 p = re.compile('.' + upper_char, re.I | re.S)
1405 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001406 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001407
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001408 p = re.compile('.' + lower_char, re.I | re.S)
1409 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001410 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001411
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001412 p = re.compile('(?i).' + upper_char, re.S)
1413 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001414 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001415
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001416 p = re.compile('(?i).' + lower_char, re.S)
1417 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001418 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001419
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001420 p = re.compile('(?is).' + upper_char)
1421 q = p.match('\n' + lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001422 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001423
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001424 p = re.compile('(?is).' + lower_char)
1425 q = p.match('\n' + upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001426 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001427
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001428 p = re.compile('(?s)(?i).' + upper_char)
1429 q = p.match('\n' + lower_char)
1430 self.assertTrue(q)
1431
1432 p = re.compile('(?s)(?i).' + lower_char)
1433 q = p.match('\n' + upper_char)
1434 self.assertTrue(q)
1435
1436 self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
1437 self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
1438 self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
1439 self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
1440 self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
Serhiy Storchakad65cd092016-09-11 01:39:01 +03001441
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001442 p = upper_char + '(?i)'
1443 with self.assertWarns(DeprecationWarning) as warns:
1444 self.assertTrue(re.match(p, lower_char))
1445 self.assertEqual(
1446 str(warns.warnings[0].message),
Roy Williams171b9a32017-06-09 22:01:16 -07001447 'Flags not at the start of the expression %r' % p
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001448 )
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001449 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001450
1451 p = upper_char + '(?i)%s' % ('.?' * 100)
1452 with self.assertWarns(DeprecationWarning) as warns:
1453 self.assertTrue(re.match(p, lower_char))
1454 self.assertEqual(
1455 str(warns.warnings[0].message),
Roy Williams171b9a32017-06-09 22:01:16 -07001456 'Flags not at the start of the expression %r (truncated)' % p[:20]
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001457 )
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001458 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchakabd48d272016-09-11 12:50:02 +03001459
Roy Williams171b9a32017-06-09 22:01:16 -07001460 # bpo-30605: Compiling a bytes instance regex was throwing a BytesWarning
1461 with warnings.catch_warnings():
1462 warnings.simplefilter('error', BytesWarning)
1463 p = b'A(?i)'
1464 with self.assertWarns(DeprecationWarning) as warns:
1465 self.assertTrue(re.match(p, b'a'))
1466 self.assertEqual(
1467 str(warns.warnings[0].message),
1468 'Flags not at the start of the expression %r' % p
1469 )
1470 self.assertEqual(warns.warnings[0].filename, __file__)
1471
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001472 with self.assertWarns(DeprecationWarning):
1473 self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
1474 with self.assertWarns(DeprecationWarning):
1475 self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
1476 with self.assertWarns(DeprecationWarning):
1477 self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
1478 with self.assertWarns(DeprecationWarning):
1479 self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
1480 with self.assertWarns(DeprecationWarning):
1481 self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001482 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001483 self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001484 self.assertRegex(str(warns.warnings[0].message),
1485 'Flags not at the start')
1486 self.assertEqual(warns.warnings[0].filename, __file__)
1487 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001488 self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
1489 lower_char))
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001490 self.assertRegex(str(warns.warnings[0].message),
1491 'Flags not at the start')
1492 self.assertEqual(warns.warnings[0].filename, __file__)
1493 with self.assertWarns(DeprecationWarning) as warns:
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001494 self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
1495 lower_char))
Serhiy Storchakac7ac7282017-05-16 15:16:15 +03001496 self.assertRegex(str(warns.warnings[0].message),
1497 'Flags not at the start')
1498 self.assertEqual(warns.warnings[0].filename, __file__)
Serhiy Storchaka305ccbe2017-05-10 06:05:20 +03001499
1500
Christian Heimes25bb7832008-01-11 16:17:00 +00001501 def test_dollar_matches_twice(self):
1502 "$ matches the end of string, and just before the terminating \n"
1503 pattern = re.compile('$')
1504 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
1505 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
1506 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1507
1508 pattern = re.compile('$', re.MULTILINE)
1509 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1510 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1511 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1512
Antoine Pitroufd036452008-08-19 17:56:33 +00001513 def test_bytes_str_mixing(self):
1514 # Mixing str and bytes is disallowed
1515 pat = re.compile('.')
1516 bpat = re.compile(b'.')
1517 self.assertRaises(TypeError, pat.match, b'b')
1518 self.assertRaises(TypeError, bpat.match, 'b')
1519 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1520 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1521 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1522 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1523 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1524 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1525
1526 def test_ascii_and_unicode_flag(self):
1527 # String patterns
1528 for flags in (0, re.UNICODE):
1529 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001530 self.assertTrue(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001531 pat = re.compile(r'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001532 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001533 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001534 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001535 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001536 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001537 pat = re.compile(r'\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001538 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001539 pat = re.compile(r'(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001540 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001541 # Bytes patterns
1542 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001543 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001544 self.assertIsNone(pat.match(b'\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001545 pat = re.compile(br'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001546 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001547 # Incompatibilities
R David Murray44b548d2016-09-08 13:59:53 -04001548 self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE)
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001549 self.assertRaises(re.error, re.compile, br'(?u)\w')
R David Murray44b548d2016-09-08 13:59:53 -04001550 self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII)
1551 self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII)
1552 self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE)
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001553 self.assertRaises(re.error, re.compile, r'(?au)\w')
Antoine Pitroufd036452008-08-19 17:56:33 +00001554
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001555 def test_locale_flag(self):
Victor Stinnerab71f8b2019-03-01 00:08:03 +01001556 enc = locale.getpreferredencoding()
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001557 # Search non-ASCII letter
1558 for i in range(128, 256):
1559 try:
1560 c = bytes([i]).decode(enc)
1561 sletter = c.lower()
1562 if sletter == c: continue
1563 bletter = sletter.encode(enc)
1564 if len(bletter) != 1: continue
1565 if bletter.decode(enc) != sletter: continue
1566 bpat = re.escape(bytes([i]))
1567 break
1568 except (UnicodeError, TypeError):
1569 pass
Benjamin Peterson1e687162017-03-01 21:53:00 -08001570 else:
1571 bletter = None
1572 bpat = b'A'
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001573 # Bytes patterns
1574 pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
1575 if bletter:
1576 self.assertTrue(pat.match(bletter))
1577 pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
1578 if bletter:
1579 self.assertTrue(pat.match(bletter))
1580 pat = re.compile(bpat, re.IGNORECASE)
1581 if bletter:
1582 self.assertIsNone(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001583 pat = re.compile(br'\w', re.LOCALE)
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001584 if bletter:
1585 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001586 pat = re.compile(br'(?L)\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001587 if bletter:
1588 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001589 pat = re.compile(br'\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001590 if bletter:
1591 self.assertIsNone(pat.match(bletter))
1592 # Incompatibilities
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001593 self.assertRaises(ValueError, re.compile, '', re.LOCALE)
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001594 self.assertRaises(re.error, re.compile, '(?L)')
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001595 self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
1596 self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
1597 self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001598 self.assertRaises(re.error, re.compile, b'(?aL)')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001599
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001600 def test_scoped_flags(self):
1601 self.assertTrue(re.match(r'(?i:a)b', 'Ab'))
1602 self.assertIsNone(re.match(r'(?i:a)b', 'aB'))
1603 self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE))
1604 self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE))
1605 self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
1606 self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
1607
1608 self.assertTrue(re.match(r'(?x: a) b', 'a b'))
1609 self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
1610 self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
1611 self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
1612
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001613 self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0'))
1614 self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0'))
1615 self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII))
1616
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001617 self.checkPatternError(r'(?a)(?-a:\w)',
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001618 "bad inline flags: cannot turn off flags 'a', 'u' and 'L'", 8)
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001619 self.checkPatternError(r'(?i-i:a)',
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001620 'bad inline flags: flag turned on and off', 5)
1621 self.checkPatternError(r'(?au:a)',
1622 "bad inline flags: flags 'a', 'u' and 'L' are incompatible", 4)
1623 self.checkPatternError(br'(?aL:a)',
1624 "bad inline flags: flags 'a', 'u' and 'L' are incompatible", 4)
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001625
1626 self.checkPatternError(r'(?-', 'missing flag', 3)
1627 self.checkPatternError(r'(?-+', 'missing flag', 3)
1628 self.checkPatternError(r'(?-z', 'unknown flag', 3)
1629 self.checkPatternError(r'(?-i', 'missing :', 4)
1630 self.checkPatternError(r'(?-i)', 'missing :', 4)
1631 self.checkPatternError(r'(?-i+', 'missing :', 4)
1632 self.checkPatternError(r'(?-iz', 'unknown flag', 4)
1633 self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0)
1634 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
1635 self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
1636 self.checkPatternError(r'(?iz', 'unknown flag', 3)
1637
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001638 def test_bug_6509(self):
1639 # Replacement strings of both types must parse properly.
1640 # all strings
R David Murray44b548d2016-09-08 13:59:53 -04001641 pat = re.compile(r'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001642 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1643 pat = re.compile('a(.)')
1644 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1645 pat = re.compile('..')
1646 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1647
1648 # all bytes
R David Murray44b548d2016-09-08 13:59:53 -04001649 pat = re.compile(br'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001650 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1651 pat = re.compile(b'a(.)')
1652 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1653 pat = re.compile(b'..')
1654 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1655
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001656 def test_dealloc(self):
1657 # issue 3299: check for segfault in debug build
1658 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001659 # the overflow limit is different on wide and narrow builds and it
1660 # depends on the definition of SRE_CODE (see sre.h).
1661 # 2**128 should be big enough to overflow on both. For smaller values
1662 # a RuntimeError is raised instead of OverflowError.
1663 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001664 self.assertRaises(TypeError, re.finditer, "a", {})
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001665 with self.assertRaises(OverflowError):
Victor Stinner726a57d2016-11-22 23:04:39 +01001666 _sre.compile("abc", 0, [long_overflow], 0, {}, ())
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001667 with self.assertRaises(TypeError):
1668 _sre.compile({}, 0, [], 0, [], [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001669
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001670 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001671 self.assertTrue(re.search("123.*-", '123abc-'))
1672 self.assertTrue(re.search("123.*-", '123\xe9-'))
1673 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1674 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1675 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001676
Ezio Melottidf723e12012-03-13 01:29:48 +02001677 def test_compile(self):
1678 # Test return value when given string and pattern as parameter
1679 pattern = re.compile('random pattern')
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001680 self.assertIsInstance(pattern, re.Pattern)
Ezio Melottidf723e12012-03-13 01:29:48 +02001681 same_pattern = re.compile(pattern)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001682 self.assertIsInstance(same_pattern, re.Pattern)
Ezio Melottidf723e12012-03-13 01:29:48 +02001683 self.assertIs(same_pattern, pattern)
1684 # Test behaviour when not given a string or pattern as parameter
1685 self.assertRaises(TypeError, re.compile, 0)
1686
Antoine Pitroub33941a2012-12-03 20:55:56 +01001687 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001688 def test_large_search(self, size):
1689 # Issue #10182: indices were 32-bit-truncated.
1690 s = 'a' * size
1691 m = re.search('$', s)
1692 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001693 self.assertEqual(m.start(), size)
1694 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001695
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001696 # The huge memuse is because of re.sub() using a list and a join()
1697 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001698 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001699 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001700 # Issue #10182: indices were 32-bit-truncated.
1701 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001702 r, n = re.subn('', '', s)
1703 self.assertEqual(r, s)
1704 self.assertEqual(n, size + 1)
1705
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001706 def test_bug_16688(self):
1707 # Issue 16688: Backreferences make case-insensitive regex fail on
1708 # non-ASCII strings.
1709 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1710 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001711
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001712 def test_repeat_minmax_overflow(self):
1713 # Issue #13169
1714 string = "x" * 100000
1715 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1716 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1717 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1718 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1719 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1720 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1721 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1722 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1723 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1724 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1725 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1726
1727 @cpython_only
1728 def test_repeat_minmax_overflow_maxrepeat(self):
1729 try:
1730 from _sre import MAXREPEAT
1731 except ImportError:
1732 self.skipTest('requires _sre.MAXREPEAT constant')
1733 string = "x" * 100000
1734 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1735 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1736 (0, 100000))
1737 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1738 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1739 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1740 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1741
R David Murray26dfaac92013-04-14 13:00:54 -04001742 def test_backref_group_name_in_exception(self):
1743 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001744 self.checkPatternError('(?P=<foo>)',
1745 "bad character in group name '<foo>'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001746
1747 def test_group_name_in_exception(self):
1748 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001749 self.checkPatternError('(?P<?foo>)',
1750 "bad character in group name '?foo'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001751
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001752 def test_issue17998(self):
1753 for reps in '*', '+', '?', '{1}':
1754 for mod in '', '?':
1755 pattern = '.' + reps + mod + 'yz'
1756 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1757 ['xyz'], msg=pattern)
1758 pattern = pattern.encode()
1759 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1760 [b'xyz'], msg=pattern)
1761
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001762 def test_match_repr(self):
1763 for string in '[abracadabra]', S('[abracadabra]'):
1764 m = re.search(r'(.+)(.*?)\1', string)
Max Bernsteinccb7ca72019-05-21 10:09:21 -07001765 pattern = r"<(%s\.)?%s object; span=\(1, 12\), match='abracadabra'>" % (
1766 type(m).__module__, type(m).__qualname__
1767 )
1768 self.assertRegex(repr(m), pattern)
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001769 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1770 bytearray(b'[abracadabra]'),
1771 memoryview(b'[abracadabra]')):
R David Murray44b548d2016-09-08 13:59:53 -04001772 m = re.search(br'(.+)(.*?)\1', string)
Max Bernsteinccb7ca72019-05-21 10:09:21 -07001773 pattern = r"<(%s\.)?%s object; span=\(1, 12\), match=b'abracadabra'>" % (
1774 type(m).__module__, type(m).__qualname__
1775 )
1776 self.assertRegex(repr(m), pattern)
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001777
1778 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
Max Bernsteinccb7ca72019-05-21 10:09:21 -07001779 pattern = r"<(%s\.)?%s object; span=\(0, 2\), match='aa'>" % (
1780 type(second).__module__, type(second).__qualname__
1781 )
1782 self.assertRegex(repr(first), pattern)
1783 pattern = r"<(%s\.)?%s object; span=\(3, 5\), match='bb'>" % (
1784 type(second).__module__, type(second).__qualname__
1785 )
1786 self.assertRegex(repr(second), pattern)
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001787
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001788 def test_zerowidth(self):
1789 # Issues 852532, 1647489, 3262, 25054.
1790 self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', ''])
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001791 self.assertEqual(re.split(r"\b|:+", "a::bc"), ['', 'a', '', '', 'bc', ''])
1792 self.assertEqual(re.split(r"(?<!\w)(?=\w)|:+", "a::bc"), ['', 'a', '', 'bc'])
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001793 self.assertEqual(re.split(r"(?<=\w)(?!\w)|:+", "a::bc"), ['a', '', 'bc', ''])
1794
1795 self.assertEqual(re.sub(r"\b", "-", "a::bc"), '-a-::-bc-')
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001796 self.assertEqual(re.sub(r"\b|:+", "-", "a::bc"), '-a---bc-')
1797 self.assertEqual(re.sub(r"(\b|:+)", r"[\1]", "a::bc"), '[]a[][::][]bc[]')
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001798
1799 self.assertEqual(re.findall(r"\b|:+", "a::bc"), ['', '', '::', '', ''])
1800 self.assertEqual(re.findall(r"\b|\w+", "a::bc"),
1801 ['', 'a', '', '', 'bc', ''])
1802
1803 self.assertEqual([m.span() for m in re.finditer(r"\b|:+", "a::bc")],
1804 [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)])
1805 self.assertEqual([m.span() for m in re.finditer(r"\b|\w+", "a::bc")],
1806 [(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001807
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001808 def test_bug_2537(self):
1809 # issue 2537: empty submatches
1810 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1811 for inner_op in ('{0,}', '*', '?'):
1812 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1813 m = r.match("xyyzy")
1814 self.assertEqual(m.group(0), "xyy")
1815 self.assertEqual(m.group(1), "")
1816 self.assertEqual(m.group(2), "y")
1817
Serhiy Storchaka4ab6abf2017-05-14 09:05:13 +03001818 @cpython_only
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001819 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001820 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001821 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001822 re.compile(pat, re.DEBUG)
Serhiy Storchaka4ab6abf2017-05-14 09:05:13 +03001823 self.maxDiff = None
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001824 dump = '''\
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001825SUBPATTERN 1 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001826 LITERAL 46
Serhiy Storchaka821a9d12017-05-14 08:32:33 +03001827BRANCH
1828 IN
1829 LITERAL 99
1830 LITERAL 104
1831OR
1832 LITERAL 112
1833 LITERAL 121
1834GROUPREF_EXISTS 1
1835 AT AT_END
1836ELSE
1837 LITERAL 58
1838 LITERAL 32
Serhiy Storchaka4ab6abf2017-05-14 09:05:13 +03001839
1840 0. INFO 8 0b1 2 5 (to 9)
1841 prefix_skip 0
1842 prefix [0x2e] ('.')
1843 overlap [0]
1844 9: MARK 0
184511. LITERAL 0x2e ('.')
184613. MARK 1
184715. BRANCH 10 (to 26)
184817. IN 6 (to 24)
184919. LITERAL 0x63 ('c')
185021. LITERAL 0x68 ('h')
185123. FAILURE
185224: JUMP 9 (to 34)
185326: branch 7 (to 33)
185427. LITERAL 0x70 ('p')
185529. LITERAL 0x79 ('y')
185631. JUMP 2 (to 34)
185733: FAILURE
185834: GROUPREF_EXISTS 0 6 (to 41)
185937. AT END
186039. JUMP 5 (to 45)
186141: LITERAL 0x3a (':')
186243. LITERAL 0x20 (' ')
186345: SUCCESS
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001864'''
1865 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001866 # Debug output is output again even a second time (bypassing
1867 # the cache -- issue #20426).
1868 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001869 re.compile(pat, re.DEBUG)
1870 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001871
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001872 def test_keyword_parameters(self):
1873 # Issue #20283: Accepting the string keyword parameter.
1874 pat = re.compile(r'(ab)')
1875 self.assertEqual(
1876 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1877 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001878 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1879 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001880 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1881 self.assertEqual(
1882 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1883 self.assertEqual(
1884 pat.split(string='abracadabra', maxsplit=1),
1885 ['', 'ab', 'racadabra'])
1886 self.assertEqual(
1887 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1888 (7, 9))
1889
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001890 def test_bug_20998(self):
1891 # Issue #20998: Fullmatch of repeated single character pattern
1892 # with ignore case.
1893 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1894
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02001895 def test_locale_caching(self):
1896 # Issue #22410
1897 oldlocale = locale.setlocale(locale.LC_CTYPE)
1898 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1899 for loc in 'en_US.iso88591', 'en_US.utf8':
1900 try:
1901 locale.setlocale(locale.LC_CTYPE, loc)
1902 except locale.Error:
1903 # Unsupported locale on this system
1904 self.skipTest('test needs %s locale' % loc)
1905
1906 re.purge()
1907 self.check_en_US_iso88591()
1908 self.check_en_US_utf8()
1909 re.purge()
1910 self.check_en_US_utf8()
1911 self.check_en_US_iso88591()
1912
1913 def check_en_US_iso88591(self):
1914 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1915 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1916 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1917 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1918 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1919 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1920 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1921
1922 def check_en_US_utf8(self):
1923 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1924 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1925 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1926 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1927 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1928 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1929 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1930
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001931 def test_locale_compiled(self):
1932 oldlocale = locale.setlocale(locale.LC_CTYPE)
1933 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1934 for loc in 'en_US.iso88591', 'en_US.utf8':
1935 try:
1936 locale.setlocale(locale.LC_CTYPE, loc)
1937 except locale.Error:
1938 # Unsupported locale on this system
1939 self.skipTest('test needs %s locale' % loc)
1940
1941 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1942 p1 = re.compile(b'\xc5\xe5', re.L|re.I)
1943 p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I)
1944 p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I)
1945 p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I)
1946 for p in p1, p2, p3:
1947 self.assertTrue(p.match(b'\xc5\xe5'))
1948 self.assertTrue(p.match(b'\xe5\xe5'))
1949 self.assertTrue(p.match(b'\xc5\xc5'))
1950 self.assertIsNone(p4.match(b'\xe5\xc5'))
1951 self.assertIsNone(p4.match(b'\xe5\xe5'))
1952 self.assertIsNone(p4.match(b'\xc5\xc5'))
1953
1954 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1955 for p in p1, p2, p3:
1956 self.assertTrue(p.match(b'\xc5\xe5'))
1957 self.assertIsNone(p.match(b'\xe5\xe5'))
1958 self.assertIsNone(p.match(b'\xc5\xc5'))
1959 self.assertTrue(p4.match(b'\xe5\xc5'))
1960 self.assertIsNone(p4.match(b'\xe5\xe5'))
1961 self.assertIsNone(p4.match(b'\xc5\xc5'))
1962
Serhiy Storchakaad446d52014-11-10 13:49:00 +02001963 def test_error(self):
1964 with self.assertRaises(re.error) as cm:
1965 re.compile('(\u20ac))')
1966 err = cm.exception
1967 self.assertIsInstance(err.pattern, str)
1968 self.assertEqual(err.pattern, '(\u20ac))')
1969 self.assertEqual(err.pos, 3)
1970 self.assertEqual(err.lineno, 1)
1971 self.assertEqual(err.colno, 4)
1972 self.assertIn(err.msg, str(err))
1973 self.assertIn(' at position 3', str(err))
1974 self.assertNotIn(' at position 3', err.msg)
1975 # Bytes pattern
1976 with self.assertRaises(re.error) as cm:
1977 re.compile(b'(\xa4))')
1978 err = cm.exception
1979 self.assertIsInstance(err.pattern, bytes)
1980 self.assertEqual(err.pattern, b'(\xa4))')
1981 self.assertEqual(err.pos, 3)
1982 # Multiline pattern
1983 with self.assertRaises(re.error) as cm:
1984 re.compile("""
1985 (
1986 abc
1987 )
1988 )
1989 (
1990 """, re.VERBOSE)
1991 err = cm.exception
1992 self.assertEqual(err.pos, 77)
1993 self.assertEqual(err.lineno, 5)
1994 self.assertEqual(err.colno, 17)
1995 self.assertIn(err.msg, str(err))
1996 self.assertIn(' at position 77', str(err))
1997 self.assertIn('(line 5, column 17)', str(err))
1998
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001999 def test_misc_errors(self):
2000 self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0)
2001 self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0)
2002 self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5)
2003 self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
2004 self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
2005 self.checkPatternError(r'(?iz)', 'unknown flag', 3)
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03002006 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02002007 self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
2008 self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
2009 self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
2010 self.checkPatternError(r'(?', 'unexpected end of pattern', 2)
2011
Victor Stinner8bf43e62016-11-14 12:38:43 +01002012 def test_enum(self):
2013 # Issue #28082: Check that str(flag) returns a human readable string
2014 # instead of an integer
2015 self.assertIn('ASCII', str(re.A))
2016 self.assertIn('DOTALL', str(re.S))
2017
Victor Stinnerb44fb122016-11-21 16:35:08 +01002018 def test_pattern_compare(self):
2019 pattern1 = re.compile('abc', re.IGNORECASE)
2020
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002021 # equal to itself
2022 self.assertEqual(pattern1, pattern1)
2023 self.assertFalse(pattern1 != pattern1)
2024
Victor Stinnerb44fb122016-11-21 16:35:08 +01002025 # equal
2026 re.purge()
2027 pattern2 = re.compile('abc', re.IGNORECASE)
2028 self.assertEqual(hash(pattern2), hash(pattern1))
2029 self.assertEqual(pattern2, pattern1)
2030
2031 # not equal: different pattern
2032 re.purge()
2033 pattern3 = re.compile('XYZ', re.IGNORECASE)
2034 # Don't test hash(pattern3) != hash(pattern1) because there is no
2035 # warranty that hash values are different
2036 self.assertNotEqual(pattern3, pattern1)
2037
2038 # not equal: different flag (flags=0)
2039 re.purge()
2040 pattern4 = re.compile('abc')
2041 self.assertNotEqual(pattern4, pattern1)
2042
2043 # only == and != comparison operators are supported
2044 with self.assertRaises(TypeError):
2045 pattern1 < pattern2
2046
2047 def test_pattern_compare_bytes(self):
2048 pattern1 = re.compile(b'abc')
2049
2050 # equal: test bytes patterns
2051 re.purge()
2052 pattern2 = re.compile(b'abc')
2053 self.assertEqual(hash(pattern2), hash(pattern1))
2054 self.assertEqual(pattern2, pattern1)
2055
2056 # not equal: pattern of a different types (str vs bytes),
2057 # comparison must not raise a BytesWarning
2058 re.purge()
2059 pattern3 = re.compile('abc')
2060 with warnings.catch_warnings():
2061 warnings.simplefilter('error', BytesWarning)
2062 self.assertNotEqual(pattern3, pattern1)
2063
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02002064 def test_bug_29444(self):
2065 s = bytearray(b'abcdefgh')
2066 m = re.search(b'[a-h]+', s)
2067 m2 = re.search(b'[e-h]+', s)
2068 self.assertEqual(m.group(), b'abcdefgh')
2069 self.assertEqual(m2.group(), b'efgh')
2070 s[:] = b'xyz'
2071 self.assertEqual(m.group(), b'xyz')
2072 self.assertEqual(m2.group(), b'')
2073
animalize4a7f44a2019-02-18 21:26:37 +08002074 def test_bug_34294(self):
2075 # Issue 34294: wrong capturing groups
2076
2077 # exists since Python 2
2078 s = "a\tx"
2079 p = r"\b(?=(\t)|(x))x"
2080 self.assertEqual(re.search(p, s).groups(), (None, 'x'))
2081
2082 # introduced in Python 3.7.0
2083 s = "ab"
2084 p = r"(?=(.)(.)?)"
2085 self.assertEqual(re.findall(p, s),
2086 [('a', 'b'), ('b', '')])
2087 self.assertEqual([m.groups() for m in re.finditer(p, s)],
2088 [('a', 'b'), ('b', None)])
2089
2090 # test-cases provided by issue34294, introduced in Python 3.7.0
2091 p = r"(?=<(?P<tag>\w+)/?>(?:(?P<text>.+?)</(?P=tag)>)?)"
2092 s = "<test><foo2/></test>"
2093 self.assertEqual(re.findall(p, s),
2094 [('test', '<foo2/>'), ('foo2', '')])
2095 self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
2096 [{'tag': 'test', 'text': '<foo2/>'},
2097 {'tag': 'foo2', 'text': None}])
2098 s = "<test>Hello</test><foo/>"
2099 self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
2100 [{'tag': 'test', 'text': 'Hello'},
2101 {'tag': 'foo', 'text': None}])
2102 s = "<test>Hello</test><foo/><foo/>"
2103 self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
2104 [{'tag': 'test', 'text': 'Hello'},
2105 {'tag': 'foo', 'text': None},
2106 {'tag': 'foo', 'text': None}])
2107
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02002108
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02002109class PatternReprTests(unittest.TestCase):
2110 def check(self, pattern, expected):
2111 self.assertEqual(repr(re.compile(pattern)), expected)
2112
2113 def check_flags(self, pattern, flags, expected):
2114 self.assertEqual(repr(re.compile(pattern, flags)), expected)
2115
2116 def test_without_flags(self):
2117 self.check('random pattern',
2118 "re.compile('random pattern')")
2119
2120 def test_single_flag(self):
2121 self.check_flags('random pattern', re.IGNORECASE,
2122 "re.compile('random pattern', re.IGNORECASE)")
2123
2124 def test_multiple_flags(self):
2125 self.check_flags('random pattern', re.I|re.S|re.X,
2126 "re.compile('random pattern', "
2127 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
2128
2129 def test_unicode_flag(self):
2130 self.check_flags('random pattern', re.U,
2131 "re.compile('random pattern')")
2132 self.check_flags('random pattern', re.I|re.S|re.U,
2133 "re.compile('random pattern', "
2134 "re.IGNORECASE|re.DOTALL)")
2135
2136 def test_inline_flags(self):
2137 self.check('(?i)pattern',
2138 "re.compile('(?i)pattern', re.IGNORECASE)")
2139
2140 def test_unknown_flags(self):
2141 self.check_flags('random pattern', 0x123000,
2142 "re.compile('random pattern', 0x123000)")
2143 self.check_flags('random pattern', 0x123000|re.I,
2144 "re.compile('random pattern', re.IGNORECASE|0x123000)")
2145
2146 def test_bytes(self):
2147 self.check(b'bytes pattern',
2148 "re.compile(b'bytes pattern')")
2149 self.check_flags(b'bytes pattern', re.A,
2150 "re.compile(b'bytes pattern', re.ASCII)")
2151
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02002152 def test_locale(self):
2153 self.check_flags(b'bytes pattern', re.L,
2154 "re.compile(b'bytes pattern', re.LOCALE)")
2155
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02002156 def test_quotes(self):
2157 self.check('random "double quoted" pattern',
2158 '''re.compile('random "double quoted" pattern')''')
2159 self.check("random 'single quoted' pattern",
2160 '''re.compile("random 'single quoted' pattern")''')
2161 self.check('''both 'single' and "double" quotes''',
2162 '''re.compile('both \\'single\\' and "double" quotes')''')
2163
2164 def test_long_pattern(self):
2165 pattern = 'Very %spattern' % ('long ' * 1000)
2166 r = repr(re.compile(pattern))
2167 self.assertLess(len(r), 300)
2168 self.assertEqual(r[:30], "re.compile('Very long long lon")
2169 r = repr(re.compile(pattern, re.I))
2170 self.assertLess(len(r), 300)
2171 self.assertEqual(r[:30], "re.compile('Very long long lon")
2172 self.assertEqual(r[-16:], ", re.IGNORECASE)")
2173
Serhiy Storchaka14a0e162019-05-31 10:39:47 +03002174 def test_flags_repr(self):
2175 self.assertEqual(repr(re.I), "re.IGNORECASE")
2176 self.assertEqual(repr(re.I|re.S|re.X),
2177 "re.IGNORECASE|re.DOTALL|re.VERBOSE")
2178 self.assertEqual(repr(re.I|re.S|re.X|(1<<20)),
2179 "re.IGNORECASE|re.DOTALL|re.VERBOSE|0x100000")
Ethan Furman7aaeb2a2021-01-25 14:26:19 -08002180 self.assertEqual(
2181 repr(~re.I),
2182 "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.DOTALL|re.VERBOSE|re.TEMPLATE|re.DEBUG")
Serhiy Storchaka14a0e162019-05-31 10:39:47 +03002183 self.assertEqual(repr(~(re.I|re.S|re.X)),
Ethan Furman7aaeb2a2021-01-25 14:26:19 -08002184 "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.TEMPLATE|re.DEBUG")
Serhiy Storchaka14a0e162019-05-31 10:39:47 +03002185 self.assertEqual(repr(~(re.I|re.S|re.X|(1<<20))),
Ethan Furman7aaeb2a2021-01-25 14:26:19 -08002186 "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.TEMPLATE|re.DEBUG|0xffe00")
Serhiy Storchaka14a0e162019-05-31 10:39:47 +03002187
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02002188
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02002189class ImplementationTest(unittest.TestCase):
2190 """
2191 Test implementation details of the re module.
2192 """
2193
Erlend Egeberg Aasland5daf70b2021-04-29 08:47:11 +02002194 @cpython_only
2195 def test_immutable(self):
2196 # bpo-43908: check that re types are immutable
2197 with self.assertRaises(TypeError):
2198 re.Match.foo = 1
2199 with self.assertRaises(TypeError):
2200 re.Pattern.foo = 1
2201 with self.assertRaises(TypeError):
2202 pat = re.compile("")
2203 tp = type(pat.scanner(""))
2204 tp.foo = 1
2205
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02002206 def test_overlap_table(self):
2207 f = sre_compile._generate_overlap_table
2208 self.assertEqual(f(""), [])
2209 self.assertEqual(f("a"), [0])
2210 self.assertEqual(f("abcd"), [0, 0, 0, 0])
2211 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
2212 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
2213 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
2214
Erlend Egeberg Aaslanda6109ef2020-11-20 13:36:23 +01002215 def test_signedness(self):
2216 self.assertGreaterEqual(sre_compile.MAXREPEAT, 0)
2217 self.assertGreaterEqual(sre_compile.MAXGROUPS, 0)
2218
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +02002219 @cpython_only
2220 def test_disallow_instantiation(self):
2221 # Ensure that the type disallows instantiation (bpo-43916)
Erlend Egeberg Aasland0a3452e2021-06-24 01:46:25 +02002222 check_disallow_instantiation(self, re.Match)
2223 check_disallow_instantiation(self, re.Pattern)
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +02002224 pat = re.compile("")
Erlend Egeberg Aasland0a3452e2021-06-24 01:46:25 +02002225 check_disallow_instantiation(self, type(pat.scanner("")))
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +02002226
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02002227
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002228class ExternalTests(unittest.TestCase):
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002229
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002230 def test_re_benchmarks(self):
2231 're_tests benchmarks'
2232 from test.re_tests import benchmarks
2233 for pattern, s in benchmarks:
2234 with self.subTest(pattern=pattern, string=s):
2235 p = re.compile(pattern)
2236 self.assertTrue(p.search(s))
2237 self.assertTrue(p.match(s))
2238 self.assertTrue(p.fullmatch(s))
2239 s2 = ' '*10000 + s + ' '*10000
2240 self.assertTrue(p.search(s2))
2241 self.assertTrue(p.match(s2, 10000))
2242 self.assertTrue(p.match(s2, 10000, 10000 + len(s)))
2243 self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s)))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002244
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002245 def test_re_tests(self):
2246 're_tests test suite'
Victor Stinner57572b12020-04-30 01:48:37 +02002247 from test.re_tests import tests, FAIL, SYNTAX_ERROR
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002248 for t in tests:
2249 pattern = s = outcome = repl = expected = None
2250 if len(t) == 5:
2251 pattern, s, outcome, repl, expected = t
2252 elif len(t) == 3:
2253 pattern, s, outcome = t
Guido van Rossum41360a41998-03-26 19:42:58 +00002254 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002255 raise ValueError('Test tuples should have 3 or 5 fields', t)
2256
2257 with self.subTest(pattern=pattern, string=s):
2258 if outcome == SYNTAX_ERROR: # Expected a syntax error
2259 with self.assertRaises(re.error):
2260 re.compile(pattern)
2261 continue
2262
2263 obj = re.compile(pattern)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002264 result = obj.search(s)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002265 if outcome == FAIL:
2266 self.assertIsNone(result, 'Succeeded incorrectly')
2267 continue
2268
2269 with self.subTest():
2270 self.assertTrue(result, 'Failed incorrectly')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002271 # Matched, as expected, so now we compute the
2272 # result string and compare it to our expected result.
2273 start, end = result.span(0)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002274 vardict = {'found': result.group(0),
2275 'groups': result.group(),
2276 'flags': result.re.flags}
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002277 for i in range(1, 100):
2278 try:
2279 gi = result.group(i)
2280 # Special hack because else the string concat fails:
2281 if gi is None:
2282 gi = "None"
2283 except IndexError:
2284 gi = "Error"
2285 vardict['g%d' % i] = gi
2286 for i in result.re.groupindex.keys():
2287 try:
2288 gi = result.group(i)
2289 if gi is None:
2290 gi = "None"
2291 except IndexError:
2292 gi = "Error"
2293 vardict[i] = gi
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002294 self.assertEqual(eval(repl, vardict), expected,
2295 'grouping error')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002296
Antoine Pitrou22628c42008-07-22 17:53:22 +00002297 # Try the match with both pattern and string converted to
2298 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002299 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00002300 bpat = bytes(pattern, "ascii")
2301 bs = bytes(s, "ascii")
2302 except UnicodeEncodeError:
2303 # skip non-ascii tests
2304 pass
2305 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002306 with self.subTest('bytes pattern match'):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02002307 obj = re.compile(bpat)
2308 self.assertTrue(obj.search(bs))
2309
2310 # Try the match with LOCALE enabled, and check that it
2311 # still succeeds.
2312 with self.subTest('locale-sensitive match'):
2313 obj = re.compile(bpat, re.LOCALE)
2314 result = obj.search(bs)
2315 if result is None:
2316 print('=== Fails on locale-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002317
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002318 # Try the match with the search area limited to the extent
2319 # of the match and see if it still succeeds. \B will
2320 # break (because it won't match at the end or start of a
2321 # string), so we'll ignore patterns that feature it.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002322 if (pattern[:2] != r'\B' and pattern[-2:] != r'\B'
2323 and result is not None):
2324 with self.subTest('range-limited match'):
2325 obj = re.compile(pattern)
2326 self.assertTrue(obj.search(s, start, end + 1))
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00002327
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002328 # Try the match with IGNORECASE enabled, and check that it
2329 # still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002330 with self.subTest('case-insensitive match'):
2331 obj = re.compile(pattern, re.IGNORECASE)
2332 self.assertTrue(obj.search(s))
Guido van Rossumdfa67901997-12-08 17:12:06 +00002333
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002334 # Try the match with UNICODE locale enabled, and check
2335 # that it still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002336 with self.subTest('unicode-sensitive match'):
2337 obj = re.compile(pattern, re.UNICODE)
2338 self.assertTrue(obj.search(s))
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002339
Gregory P. Smith5a631832010-07-27 05:31:29 +00002340
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002341if __name__ == "__main__":
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002342 unittest.main()