blob: b3b29f847e619ea2112942101d04cbe36716bd63 [file] [log] [blame]
Victor Stinnerd6debb22017-03-27 16:05:26 +02001from test.support import (gc_collect, bigmemtest, _2G,
2 cpython_only, captured_stdout)
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02003import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02005import sre_compile
Ezio Melottid2114eb2011-03-25 14:08:44 +02006import string
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02007import unittest
Victor Stinnerb44fb122016-11-21 16:35:08 +01008import warnings
9from re import Scanner
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Guido van Rossum23b22571997-07-17 22:36:14 +000012# Misc tests from Tim Peters' re.doc
13
Just van Rossum6802c6e2003-07-02 14:36:59 +000014# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020015# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000016# cover most of the code.
17
Serhiy Storchaka25324972013-10-16 12:46:28 +030018class S(str):
19 def __getitem__(self, index):
20 return S(super().__getitem__(index))
21
22class B(bytes):
23 def __getitem__(self, index):
24 return B(super().__getitem__(index))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000027
Serhiy Storchaka25324972013-10-16 12:46:28 +030028 def assertTypedEqual(self, actual, expect, msg=None):
29 self.assertEqual(actual, expect, msg)
30 def recurse(actual, expect):
31 if isinstance(expect, (tuple, list)):
32 for x, y in zip(actual, expect):
33 recurse(x, y)
34 else:
35 self.assertIs(type(actual), type(expect), msg)
36 recurse(actual, expect)
37
Serhiy Storchaka632a77e2015-03-25 21:03:47 +020038 def checkPatternError(self, pattern, errmsg, pos=None):
39 with self.assertRaises(re.error) as cm:
40 re.compile(pattern)
41 with self.subTest(pattern=pattern):
42 err = cm.exception
43 self.assertEqual(err.msg, errmsg)
44 if pos is not None:
45 self.assertEqual(err.pos, pos)
46
47 def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
48 with self.assertRaises(re.error) as cm:
49 re.sub(pattern, repl, string)
50 with self.subTest(pattern=pattern, repl=repl):
51 err = cm.exception
52 self.assertEqual(err.msg, errmsg)
53 if pos is not None:
54 self.assertEqual(err.pos, pos)
55
Benjamin Petersone48944b2012-03-07 14:50:25 -060056 def test_keep_buffer(self):
57 # See bug 14212
58 b = bytearray(b'x')
59 it = re.finditer(b'a', b)
60 with self.assertRaises(BufferError):
61 b.extend(b'x'*400)
62 list(it)
63 del it
64 gc_collect()
65 b.extend(b'x'*400)
66
Raymond Hettinger027bb632004-05-31 03:09:25 +000067 def test_weakref(self):
68 s = 'QabbbcR'
69 x = re.compile('ab+c')
70 y = proxy(x)
71 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
72
Skip Montanaro8ed06da2003-04-24 19:43:18 +000073 def test_search_star_plus(self):
74 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
75 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
76 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
77 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030078 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000079 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
80 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
81 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
82 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030083 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000084
Skip Montanaro8ed06da2003-04-24 19:43:18 +000085 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000086 int_value = int(matchobj.group(0))
87 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000088
Skip Montanaro8ed06da2003-04-24 19:43:18 +000089 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030090 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
91 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
92 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
93 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
94 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
95 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030096 for y in ("\xe0", "\u0430", "\U0001d49c"):
97 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +030098
Skip Montanaro8ed06da2003-04-24 19:43:18 +000099 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
100 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
101 '9.3 -3 24x100y')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300102 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
103 '9.3 -3 23x99y')
Victor Stinner55e614a2014-10-29 16:58:59 +0100104 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000105 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000106
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000107 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
108 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +0000109
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000110 s = r"\1\1"
111 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
112 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
113 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +0000114
R David Murray44b548d2016-09-08 13:59:53 -0400115 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
116 self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
117 self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx')
118 self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000119
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200120 self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
121 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
122 self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
123 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
124 for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
125 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300126 with self.assertRaises(re.error):
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200127 self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
Guido van Rossum95e80531997-08-13 22:34:14 +0000128
R David Murray44b548d2016-09-08 13:59:53 -0400129 self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000130
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000131 def test_bug_449964(self):
132 # fails for group followed by other escape
R David Murray44b548d2016-09-08 13:59:53 -0400133 self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000134 'xx\bxx\b')
135
136 def test_bug_449000(self):
137 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000138 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
139 'abc\ndef\n')
140 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
141 'abc\ndef\n')
142 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
143 'abc\ndef\n')
144 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
145 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000146
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000147 def test_bug_1661(self):
148 # Verify that flags do not get silently ignored with compiled patterns
149 pattern = re.compile('.')
150 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
151 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
152 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
153 self.assertRaises(ValueError, re.compile, pattern, re.I)
154
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000155 def test_bug_3629(self):
156 # A regex that triggered a bug in the sre-code validator
157 re.compile("(?P<quote>)(?(quote))")
158
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000159 def test_sub_template_numeric_escape(self):
160 # bug 776311 and friends
161 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
162 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
163 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
164 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
165 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
166 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
167 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200168 self.assertEqual(re.sub('x', r'\377', 'x'), '\377')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000169
170 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
171 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
172
173 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
174 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
175 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
176 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
177 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
178
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200179 self.checkTemplateError('x', r'\400', 'x',
180 r'octal escape value \400 outside of '
181 r'range 0-0o377', 0)
182 self.checkTemplateError('x', r'\777', 'x',
183 r'octal escape value \777 outside of '
184 r'range 0-0o377', 0)
Tim Peters0e9980f2004-09-12 03:49:31 +0000185
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300186 self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
187 self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
188 self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
189 self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
190 self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
191 self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
192 self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
193 self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
194 self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
195 self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
196 self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
197 self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
198 self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000199
200 # in python2.3 (etc), these loop endlessly in sre_parser.py
201 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
202 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
203 'xz8')
204 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
205 'xza')
206
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000207 def test_qualified_re_sub(self):
208 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300209 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Victor Stinner55e614a2014-10-29 16:58:59 +0100210 self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000211
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000212 def test_bug_114660(self):
213 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
214 'hello there')
215
216 def test_bug_462270(self):
217 # Test for empty sub() behaviour, see SF bug #462270
218 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
219 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
220
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200221 def test_symbolic_groups(self):
R David Murray44b548d2016-09-08 13:59:53 -0400222 re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
223 re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
224 re.compile(r'(?P<a1>x)\1(?(1)y)')
225 self.checkPatternError(r'(?P<a>)(?P<a>)',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200226 "redefinition of group name 'a' as group 2; "
227 "was group 1")
R David Murray44b548d2016-09-08 13:59:53 -0400228 self.checkPatternError(r'(?P<a>(?P=a))',
Serhiy Storchaka485407c2015-07-18 23:27:00 +0300229 "cannot refer to an open group", 10)
R David Murray44b548d2016-09-08 13:59:53 -0400230 self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px')
231 self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11)
232 self.checkPatternError(r'(?P=', 'missing group name', 4)
233 self.checkPatternError(r'(?P=)', 'missing group name', 4)
234 self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4)
235 self.checkPatternError(r'(?P=a)', "unknown group name 'a'")
236 self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'")
237 self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4)
238 self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4)
239 self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4)
240 self.checkPatternError(r'(?P<', 'missing group name', 4)
241 self.checkPatternError(r'(?P<>)', 'missing group name', 4)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200242 self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
243 self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
244 self.checkPatternError(r'(?(', 'missing group name', 3)
245 self.checkPatternError(r'(?())', 'missing group name', 3)
246 self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
247 self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
248 self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
249 self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200250 # New valid/invalid identifiers in Python 3
251 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
252 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200253 self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300254 # Support > 100 groups.
255 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
256 pat = '(?:%s)(?(200)z|t)' % pat
257 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200258
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000259 def test_symbolic_refs(self):
R David Murray44b548d2016-09-08 13:59:53 -0400260 self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200261 'missing >, unterminated name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400262 self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200263 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400264 self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2)
265 self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200266 "bad character in group name 'a a'", 3)
R David Murray44b548d2016-09-08 13:59:53 -0400267 self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200268 'missing group name', 3)
R David Murray44b548d2016-09-08 13:59:53 -0400269 self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200270 "bad character in group name '1a1'", 3)
271 self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300272 'invalid group reference 2', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200273 self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300274 'invalid group reference 2', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200275 with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
R David Murray44b548d2016-09-08 13:59:53 -0400276 re.sub('(?P<a>x)', r'\g<ab>', 'xx')
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300277 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
278 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
R David Murray44b548d2016-09-08 13:59:53 -0400279 self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200280 "bad character in group name '-1'", 3)
Georg Brandl1d472b72013-04-14 11:40:00 +0200281 # New valid/invalid identifiers in Python 3
282 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
283 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
R David Murray44b548d2016-09-08 13:59:53 -0400284 self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200285 "bad character in group name '©'", 3)
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300286 # Support > 100 groups.
287 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
R David Murray44b548d2016-09-08 13:59:53 -0400288 self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000289
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000290 def test_re_subn(self):
291 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
292 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
293 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
294 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300295 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Victor Stinner55e614a2014-10-29 16:58:59 +0100296 self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000297
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000298 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300299 for string in ":a:b::c", S(":a:b::c"):
300 self.assertTypedEqual(re.split(":", string),
301 ['', 'a', 'b', '', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200302 self.assertTypedEqual(re.split(":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300303 ['', 'a', 'b', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200304 self.assertTypedEqual(re.split("(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300305 ['', ':', 'a', ':', 'b', '::', 'c'])
306 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
307 memoryview(b":a:b::c")):
308 self.assertTypedEqual(re.split(b":", string),
309 [b'', b'a', b'b', b'', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200310 self.assertTypedEqual(re.split(b":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300311 [b'', b'a', b'b', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200312 self.assertTypedEqual(re.split(b"(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300313 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300314 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
315 "\U0001d49c\U0001d49e\U0001d4b5"):
316 string = ":%s:%s::%s" % (a, b, c)
317 self.assertEqual(re.split(":", string), ['', a, b, '', c])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200318 self.assertEqual(re.split(":+", string), ['', a, b, c])
319 self.assertEqual(re.split("(:+)", string),
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300321
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200322 self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
323 self.assertEqual(re.split("(:)+", ":a:b::c"),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000324 ['', ':', 'a', ':', 'b', ':', 'c'])
325 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
326 ['', ':', 'a', ':b::', 'c'])
327 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
328 ['', None, ':', 'a', None, ':', '', 'b', None, '',
329 None, '::', 'c'])
330 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
331 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000332
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200333 for sep, expected in [
334 (':*', ['', 'a', 'b', 'c']),
335 ('(?::*)', ['', 'a', 'b', 'c']),
336 ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
337 ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
338 ]:
339 with self.subTest(sep=sep), self.assertWarns(FutureWarning):
340 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
341
342 for sep, expected in [
343 ('', [':a:b::c']),
344 (r'\b', [':a:b::c']),
345 (r'(?=:)', [':a:b::c']),
346 (r'(?<=:)', [':a:b::c']),
347 ]:
348 with self.subTest(sep=sep), self.assertRaises(ValueError):
349 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
350
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000351 def test_qualified_re_split(self):
Serhiy Storchakab02f8fc2016-09-25 20:36:23 +0300352 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
Victor Stinner55e614a2014-10-29 16:58:59 +0100353 self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
354 self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
355 self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000356 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200357 self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000358 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200359 with self.assertWarns(FutureWarning):
360 self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
361 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000362
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000363 def test_re_findall(self):
364 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300365 for string in "a:b::c:::d", S("a:b::c:::d"):
366 self.assertTypedEqual(re.findall(":+", string),
367 [":", "::", ":::"])
368 self.assertTypedEqual(re.findall("(:+)", string),
369 [":", "::", ":::"])
370 self.assertTypedEqual(re.findall("(:)(:*)", string),
371 [(":", ""), (":", ":"), (":", "::")])
372 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
373 memoryview(b"a:b::c:::d")):
374 self.assertTypedEqual(re.findall(b":+", string),
375 [b":", b"::", b":::"])
376 self.assertTypedEqual(re.findall(b"(:+)", string),
377 [b":", b"::", b":::"])
378 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
379 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 for x in ("\xe0", "\u0430", "\U0001d49c"):
381 xx = x * 2
382 xxx = x * 3
383 string = "a%sb%sc%sd" % (x, xx, xxx)
384 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
385 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
386 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
387 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000388
Skip Montanaro5ba00542003-04-25 16:00:14 +0000389 def test_bug_117612(self):
390 self.assertEqual(re.findall(r"(a|(b))", "aba"),
391 [("a", ""),("b", "b"),("a", "")])
392
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000393 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300394 for string in 'a', S('a'):
395 self.assertEqual(re.match('a', string).groups(), ())
396 self.assertEqual(re.match('(a)', string).groups(), ('a',))
397 self.assertEqual(re.match('(a)', string).group(0), 'a')
398 self.assertEqual(re.match('(a)', string).group(1), 'a')
399 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
400 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
401 self.assertEqual(re.match(b'a', string).groups(), ())
402 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
403 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
404 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
405 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300406 for a in ("\xe0", "\u0430", "\U0001d49c"):
407 self.assertEqual(re.match(a, a).groups(), ())
408 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
409 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
410 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
411 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000412
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000413 pat = re.compile('((a)|(b))(c)?')
414 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
415 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
416 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
417 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
418 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000419
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000420 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
421 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
422 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
423 (None, 'b', None))
424 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000425
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +0300426 def test_group(self):
427 class Index:
428 def __init__(self, value):
429 self.value = value
430 def __index__(self):
431 return self.value
432 # A single group
433 m = re.match('(a)(b)', 'ab')
434 self.assertEqual(m.group(), 'ab')
435 self.assertEqual(m.group(0), 'ab')
436 self.assertEqual(m.group(1), 'a')
437 self.assertEqual(m.group(Index(1)), 'a')
438 self.assertRaises(IndexError, m.group, -1)
439 self.assertRaises(IndexError, m.group, 3)
440 self.assertRaises(IndexError, m.group, 1<<1000)
441 self.assertRaises(IndexError, m.group, Index(1<<1000))
442 self.assertRaises(IndexError, m.group, 'x')
443 # Multiple groups
444 self.assertEqual(m.group(2, 1), ('b', 'a'))
445 self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))
446
Eric V. Smith605bdae2016-09-11 08:55:43 -0400447 def test_match_getitem(self):
448 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
449
450 m = pat.match('a')
451 self.assertEqual(m['a1'], 'a')
452 self.assertEqual(m['b2'], None)
453 self.assertEqual(m['c3'], None)
454 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
455 self.assertEqual(m[0], 'a')
456 self.assertEqual(m[1], 'a')
457 self.assertEqual(m[2], None)
458 self.assertEqual(m[3], None)
459 with self.assertRaisesRegex(IndexError, 'no such group'):
460 m['X']
461 with self.assertRaisesRegex(IndexError, 'no such group'):
462 m[-1]
463 with self.assertRaisesRegex(IndexError, 'no such group'):
464 m[4]
465 with self.assertRaisesRegex(IndexError, 'no such group'):
466 m[0, 1]
467 with self.assertRaisesRegex(IndexError, 'no such group'):
468 m[(0,)]
469 with self.assertRaisesRegex(IndexError, 'no such group'):
470 m[(0, 1)]
471 with self.assertRaisesRegex(KeyError, 'a2'):
472 'a1={a2}'.format_map(m)
473
474 m = pat.match('ac')
475 self.assertEqual(m['a1'], 'a')
476 self.assertEqual(m['b2'], None)
477 self.assertEqual(m['c3'], 'c')
478 self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
479 self.assertEqual(m[0], 'ac')
480 self.assertEqual(m[1], 'a')
481 self.assertEqual(m[2], None)
482 self.assertEqual(m[3], 'c')
483
484 # Cannot assign.
485 with self.assertRaises(TypeError):
486 m[0] = 1
487
488 # No len().
489 self.assertRaises(TypeError, len, m)
490
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200491 def test_re_fullmatch(self):
492 # Issue 16203: Proposal: add re.fullmatch() method.
493 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
494 for string in "ab", S("ab"):
495 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
496 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
497 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
498 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
499 r = r"%s|%s" % (a, a + b)
500 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
501 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
502 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
503 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
504 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
505 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
506 self.assertIsNone(re.fullmatch(r"a+", "ab"))
507 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
508 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
509 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
510 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
511 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
512 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
513
514 self.assertEqual(
515 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
516 self.assertEqual(
517 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
518 self.assertEqual(
519 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
520
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000521 def test_re_groupref_exists(self):
R David Murray44b548d2016-09-08 13:59:53 -0400522 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000523 ('(', 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400524 self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000525 (None, 'a'))
R David Murray44b548d2016-09-08 13:59:53 -0400526 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'))
527 self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000528 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
529 ('a', 'b'))
R David Murray44b548d2016-09-08 13:59:53 -0400530 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000531 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400532 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000533 (None, 'd'))
R David Murray44b548d2016-09-08 13:59:53 -0400534 self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(),
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000535 ('a', ''))
536
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000537 # Tests for bug #1177831: exercise groups other than the first group
538 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
539 self.assertEqual(p.match('abc').groups(),
540 ('a', 'b', 'c'))
541 self.assertEqual(p.match('ad').groups(),
542 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300543 self.assertIsNone(p.match('abd'))
544 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000545
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300546 # Support > 100 groups.
547 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
548 pat = '(?:%s)(?(200)z)' % pat
549 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000550
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200551 self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
552 self.checkPatternError(r'()(?(1)a|b',
553 'missing ), unterminated subpattern', 2)
554 self.checkPatternError(r'()(?(1)a|b|c)',
555 'conditional backref with more than '
556 'two branches', 10)
557
558 def test_re_groupref_overflow(self):
Serhiy Storchaka662cef62016-10-23 12:11:19 +0300559 from sre_constants import MAXGROUPS
560 self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
561 'invalid group reference %d' % MAXGROUPS, 3)
562 self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
563 'invalid group reference %d' % MAXGROUPS, 10)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200564
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000565 def test_re_groupref(self):
566 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
567 ('|', 'a'))
568 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
569 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300570 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
571 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000572 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
573 ('a', 'a'))
574 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
575 (None, None))
576
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200577 self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)
578
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000579 def test_groupdict(self):
580 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
581 'first second').groupdict(),
582 {'first':'first', 'second':'second'})
583
584 def test_expand(self):
585 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
586 "first second")
587 .expand(r"\2 \1 \g<second> \g<first>"),
588 "second first second first")
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300589 self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
590 "first")
591 .expand(r"\2 \g<second>"),
592 " ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000593
594 def test_repeat_minmax(self):
R David Murray44b548d2016-09-08 13:59:53 -0400595 self.assertIsNone(re.match(r"^(\w){1}$", "abc"))
596 self.assertIsNone(re.match(r"^(\w){1}?$", "abc"))
597 self.assertIsNone(re.match(r"^(\w){1,2}$", "abc"))
598 self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000599
R David Murray44b548d2016-09-08 13:59:53 -0400600 self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c")
601 self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c")
602 self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c")
603 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
604 self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c")
605 self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c")
606 self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c")
607 self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000608
R David Murray44b548d2016-09-08 13:59:53 -0400609 self.assertIsNone(re.match(r"^x{1}$", "xxx"))
610 self.assertIsNone(re.match(r"^x{1}?$", "xxx"))
611 self.assertIsNone(re.match(r"^x{1,2}$", "xxx"))
612 self.assertIsNone(re.match(r"^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000613
R David Murray44b548d2016-09-08 13:59:53 -0400614 self.assertTrue(re.match(r"^x{3}$", "xxx"))
615 self.assertTrue(re.match(r"^x{1,3}$", "xxx"))
616 self.assertTrue(re.match(r"^x{3,3}$", "xxx"))
617 self.assertTrue(re.match(r"^x{1,4}$", "xxx"))
618 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
619 self.assertTrue(re.match(r"^x{3}?$", "xxx"))
620 self.assertTrue(re.match(r"^x{1,3}?$", "xxx"))
621 self.assertTrue(re.match(r"^x{1,4}?$", "xxx"))
622 self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000623
R David Murray44b548d2016-09-08 13:59:53 -0400624 self.assertIsNone(re.match(r"^x{}$", "xxx"))
625 self.assertTrue(re.match(r"^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000626
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200627 self.checkPatternError(r'x{2,1}',
628 'min repeat greater than max repeat', 2)
629
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000630 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000631 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000632 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000633 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
634 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
635 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
636 {'first': 1, 'other': 2})
637
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000638 self.assertEqual(re.match("(a)", "a").pos, 0)
639 self.assertEqual(re.match("(a)", "a").endpos, 1)
640 self.assertEqual(re.match("(a)", "a").string, "a")
641 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300642 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000643
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300644 # Issue 14260. groupindex should be non-modifiable mapping.
645 p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
646 self.assertEqual(sorted(p.groupindex), ['first', 'other'])
647 self.assertEqual(p.groupindex['other'], 2)
648 with self.assertRaises(TypeError):
649 p.groupindex['other'] = 0
650 self.assertEqual(p.groupindex['other'], 2)
651
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000652 def test_special_escapes(self):
653 self.assertEqual(re.search(r"\b(b.)\b",
654 "abcd abc bcd bx").group(1), "bx")
655 self.assertEqual(re.search(r"\B(b.)\B",
656 "abc bcd bc abxd").group(1), "bx")
657 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300658 "abcd abc bcd bx", re.ASCII).group(1), "bx")
659 self.assertEqual(re.search(r"\B(b.)\B",
660 "abc bcd bc abxd", re.ASCII).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000661 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
662 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300663 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300664 self.assertEqual(re.search(br"\b(b.)\b",
665 b"abcd abc bcd bx").group(1), b"bx")
666 self.assertEqual(re.search(br"\B(b.)\B",
667 b"abc bcd bc abxd").group(1), b"bx")
668 self.assertEqual(re.search(br"\b(b.)\b",
669 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
670 self.assertEqual(re.search(br"\B(b.)\B",
671 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
672 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
673 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300674 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000675 self.assertEqual(re.search(r"\d\D\w\W\s\S",
676 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300677 self.assertEqual(re.search(br"\d\D\w\W\s\S",
678 b"1aa! a").group(0), b"1aa! a")
679 self.assertEqual(re.search(r"\d\D\w\W\s\S",
680 "1aa! a", re.ASCII).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300681 self.assertEqual(re.search(br"\d\D\w\W\s\S",
682 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000683
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200684 def test_other_escapes(self):
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200685 self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200686 self.assertEqual(re.match(r"\(", '(').group(), '(')
687 self.assertIsNone(re.match(r"\(", ')'))
688 self.assertEqual(re.match(r"\\", '\\').group(), '\\')
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200689 self.assertEqual(re.match(r"[\]]", ']').group(), ']')
690 self.assertIsNone(re.match(r"[\]]", '['))
691 self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
692 self.assertIsNone(re.match(r"[a\-c]", 'b'))
693 self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
694 self.assertIsNone(re.match(r"[\^a]+", 'b'))
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200695 re.purge() # for warnings
696 for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
697 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300698 self.assertRaises(re.error, re.compile, '\\%c' % c)
Serhiy Storchakaa54aae02015-03-24 22:58:14 +0200699 for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
700 with self.subTest(c):
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +0300701 self.assertRaises(re.error, re.compile, '[\\%c]' % c)
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200702
Ezio Melotti5a045b92012-02-29 11:48:44 +0200703 def test_string_boundaries(self):
704 # See http://bugs.python.org/issue10713
705 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
706 "abc")
707 # There's a word boundary at the start of a string.
708 self.assertTrue(re.match(r"\b", "abc"))
709 # A non-empty string includes a non-boundary zero-length match.
710 self.assertTrue(re.search(r"\B", "abc"))
711 # There is no non-boundary match at the start of a string.
712 self.assertFalse(re.match(r"\B", "abc"))
713 # However, an empty string contains no word boundaries, and also no
714 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300715 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200716 # This one is questionable and different from the perlre behaviour,
717 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300718 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200719 # A single word-character string has two boundaries, but no
720 # non-boundary gaps.
721 self.assertEqual(len(re.findall(r"\b", "a")), 2)
722 self.assertEqual(len(re.findall(r"\B", "a")), 0)
723 # If there are no words, there are no boundaries
724 self.assertEqual(len(re.findall(r"\b", " ")), 0)
725 self.assertEqual(len(re.findall(r"\b", " ")), 0)
726 # Can match around the whitespace.
727 self.assertEqual(len(re.findall(r"\B", " ")), 2)
728
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000729 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000730 self.assertEqual(re.match("([\u2222\u2223])",
731 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300732 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300733 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000734
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100735 def test_big_codesize(self):
736 # Issue #1160
737 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300738 self.assertTrue(r.match('1000'))
739 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100740
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000741 def test_anyall(self):
742 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
743 "a\nb")
744 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
745 "a\n\nb")
746
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200747 def test_lookahead(self):
R David Murray44b548d2016-09-08 13:59:53 -0400748 self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a")
749 self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a")
750 self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a")
751 self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000752 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
753 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
754 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
755
756 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
757 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
758 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
759 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
760
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200761 # Group reference.
762 self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
763 self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
764 # Conditional group reference.
765 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
766 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
767 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
768 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
769 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
770 # Group used before defined.
771 self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
772 self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
773 self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
774
775 def test_lookbehind(self):
776 self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
777 self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
778 self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
779 self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
780 # Group reference.
781 self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
782 self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
783 self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
784 self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
785 # Conditional group reference.
786 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
787 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
788 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
789 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
790 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
791 # Group used before defined.
792 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
793 self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
794 self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
795 # Group defined in the same lookbehind pattern
796 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
797 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
798 self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
799 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
800
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000801 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000802 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300803 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000804 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
805 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
806 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
807 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
808 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
809 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
810 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
811 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
812
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200813 assert '\u212a'.lower() == 'k' # 'K'
814 self.assertTrue(re.match(r'K', '\u212a', re.I))
815 self.assertTrue(re.match(r'k', '\u212a', re.I))
816 self.assertTrue(re.match(r'\u212a', 'K', re.I))
817 self.assertTrue(re.match(r'\u212a', 'k', re.I))
818 assert '\u017f'.upper() == 'S' # 'ſ'
819 self.assertTrue(re.match(r'S', '\u017f', re.I))
820 self.assertTrue(re.match(r's', '\u017f', re.I))
821 self.assertTrue(re.match(r'\u017f', 'S', re.I))
822 self.assertTrue(re.match(r'\u017f', 's', re.I))
823 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
824 self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
825 self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
826
827 def test_ignore_case_set(self):
828 self.assertTrue(re.match(r'[19A]', 'A', re.I))
829 self.assertTrue(re.match(r'[19a]', 'a', re.I))
830 self.assertTrue(re.match(r'[19a]', 'A', re.I))
831 self.assertTrue(re.match(r'[19A]', 'a', re.I))
832 self.assertTrue(re.match(br'[19A]', b'A', re.I))
833 self.assertTrue(re.match(br'[19a]', b'a', re.I))
834 self.assertTrue(re.match(br'[19a]', b'A', re.I))
835 self.assertTrue(re.match(br'[19A]', b'a', re.I))
836 assert '\u212a'.lower() == 'k' # 'K'
837 self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
838 self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
839 self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
840 self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
841 assert '\u017f'.upper() == 'S' # 'ſ'
842 self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
843 self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
844 self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
845 self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
846 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
847 self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
848 self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
849
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200850 def test_ignore_case_range(self):
851 # Issues #3511, #17381.
852 self.assertTrue(re.match(r'[9-a]', '_', re.I))
853 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
854 self.assertTrue(re.match(br'[9-a]', b'_', re.I))
855 self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
856 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
857 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
858 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
859 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
860 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
861 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
862 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
863 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
864 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
865 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
866 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
867 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
868
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200869 assert '\u212a'.lower() == 'k' # 'K'
870 self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
871 self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
872 self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
873 self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
874 assert '\u017f'.upper() == 'S' # 'ſ'
875 self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
876 self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
877 self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
878 self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
879 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
880 self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
881 self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
882
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000883 def test_category(self):
884 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
885
886 def test_getlower(self):
887 import _sre
888 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
889 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
890 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200891 self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000892
893 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300894 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200895 self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
896 self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000897
898 def test_not_literal(self):
R David Murray44b548d2016-09-08 13:59:53 -0400899 self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
900 self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000901
902 def test_search_coverage(self):
R David Murray44b548d2016-09-08 13:59:53 -0400903 self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
904 self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000905
Ezio Melottid2114eb2011-03-25 14:08:44 +0200906 def assertMatch(self, pattern, text, match=None, span=None,
Serhiy Storchaka59083002017-04-13 21:06:43 +0300907 matcher=re.fullmatch):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200908 if match is None and span is None:
909 # the pattern matches the whole text
910 match = text
911 span = (0, len(text))
912 elif match is None or span is None:
913 raise ValueError('If match is not None, span should be specified '
914 '(and vice versa).')
915 m = matcher(pattern, text)
916 self.assertTrue(m)
917 self.assertEqual(m.group(), match)
918 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000919
Serhiy Storchaka59083002017-04-13 21:06:43 +0300920 LITERAL_CHARS = string.ascii_letters + string.digits + '!"%&\',/:;<=>@_`~'
921
Ezio Melottid2114eb2011-03-25 14:08:44 +0200922 def test_re_escape(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200923 p = ''.join(chr(i) for i in range(256))
924 for c in p:
Ezio Melottid2114eb2011-03-25 14:08:44 +0200925 self.assertMatch(re.escape(c), c)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300926 self.assertMatch('[' + re.escape(c) + ']', c)
927 self.assertMatch('(?x)' + re.escape(c), c)
Ezio Melottid2114eb2011-03-25 14:08:44 +0200928 self.assertMatch(re.escape(p), p)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300929 for c in '-.]{}':
930 self.assertEqual(re.escape(c)[:1], '\\')
931 literal_chars = self.LITERAL_CHARS
932 self.assertEqual(re.escape(literal_chars), literal_chars)
Guido van Rossum49946571997-07-18 04:26:25 +0000933
Serhiy Storchaka59083002017-04-13 21:06:43 +0300934 def test_re_escape_bytes(self):
Ezio Melottid2114eb2011-03-25 14:08:44 +0200935 p = bytes(range(256))
936 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000937 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200938 self.assertMatch(re.escape(b), b)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300939 self.assertMatch(b'[' + re.escape(b) + b']', b)
940 self.assertMatch(b'(?x)' + re.escape(b), b)
Ezio Melottid2114eb2011-03-25 14:08:44 +0200941 self.assertMatch(re.escape(p), p)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300942 for i in b'-.]{}':
943 b = bytes([i])
944 self.assertEqual(re.escape(b)[:1], b'\\')
945 literal_chars = self.LITERAL_CHARS.encode('ascii')
946 self.assertEqual(re.escape(literal_chars), literal_chars)
Guido van Rossum698280d2008-09-10 17:44:35 +0000947
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200948 def test_re_escape_non_ascii(self):
949 s = 'xxx\u2620\u2620\u2620xxx'
950 s_escaped = re.escape(s)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300951 self.assertEqual(s_escaped, s)
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200952 self.assertMatch(s_escaped, s)
953 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
954 'x\u2620\u2620\u2620x', (2, 7), re.search)
955
956 def test_re_escape_non_ascii_bytes(self):
957 b = 'y\u2620y\u2620y'.encode('utf-8')
958 b_escaped = re.escape(b)
Serhiy Storchaka59083002017-04-13 21:06:43 +0300959 self.assertEqual(b_escaped, b)
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200960 self.assertMatch(b_escaped, b)
961 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
962 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000963
Serhiy Storchakab85a9762014-09-15 11:33:19 +0300964 def test_pickling(self):
965 import pickle
966 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
967 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
968 pickled = pickle.dumps(oldpat, proto)
969 newpat = pickle.loads(pickled)
970 self.assertEqual(newpat, oldpat)
971 # current pickle expects the _compile() reconstructor in re module
972 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +0000973
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000974 def test_constants(self):
975 self.assertEqual(re.I, re.IGNORECASE)
976 self.assertEqual(re.L, re.LOCALE)
977 self.assertEqual(re.M, re.MULTILINE)
978 self.assertEqual(re.S, re.DOTALL)
979 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000980
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000981 def test_flags(self):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200982 for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300983 self.assertTrue(re.compile('^pattern$', flag))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200984 for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
985 self.assertTrue(re.compile(b'^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000986
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000987 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200988 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
989 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300990 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
991 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
992 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
993 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
994 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
995 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200996 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300997 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
998 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
999 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
1000 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
1001 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
1002 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
1003 self.assertTrue(re.match(r"\0", "\000"))
1004 self.assertTrue(re.match(r"\08", "\0008"))
1005 self.assertTrue(re.match(r"\01", "\001"))
1006 self.assertTrue(re.match(r"\018", "\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001007 self.checkPatternError(r"\567",
1008 r'octal escape value \567 outside of '
1009 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001010 self.checkPatternError(r"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001011 self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
1012 self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
1013 self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
1014 self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
1015 self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
1016 self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
1017 self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001018
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001019 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +02001020 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
1021 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001022 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
1023 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
1024 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
1025 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
1026 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
1027 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
1028 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
1029 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001030 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001031 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
1032 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
1033 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
1034 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
1035 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
1036 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001037 self.checkPatternError(r"[\567]",
1038 r'octal escape value \567 outside of '
1039 r'range 0-0o377', 1)
1040 self.checkPatternError(r"[\911]", r'bad escape \9', 1)
1041 self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1)
1042 self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1)
1043 self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1)
1044 self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1)
Serhiy Storchakac563caf2014-09-23 23:22:41 +03001045 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +02001046
1047 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001048 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001049 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
1050 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
1051 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
1052 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
1053 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
1054 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001055 self.assertRaises(re.error, re.compile, br"\u1234")
1056 self.assertRaises(re.error, re.compile, br"\U00012345")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001057 self.assertTrue(re.match(br"\0", b"\000"))
1058 self.assertTrue(re.match(br"\08", b"\0008"))
1059 self.assertTrue(re.match(br"\01", b"\001"))
1060 self.assertTrue(re.match(br"\018", b"\0018"))
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001061 self.checkPatternError(br"\567",
1062 r'octal escape value \567 outside of '
1063 r'range 0-0o377', 0)
Serhiy Storchaka662cef62016-10-23 12:11:19 +03001064 self.checkPatternError(br"\911", 'invalid group reference 91', 1)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001065 self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
1066 self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
Antoine Pitrou463badf2012-06-23 13:29:19 +02001067
1068 def test_sre_byte_class_literals(self):
1069 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001070 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
1071 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
1072 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
1073 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
1074 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
1075 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
1076 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
1077 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001078 self.assertRaises(re.error, re.compile, br"[\u1234]")
1079 self.assertRaises(re.error, re.compile, br"[\U00012345]")
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001080 self.checkPatternError(br"[\567]",
1081 r'octal escape value \567 outside of '
1082 r'range 0-0o377', 1)
1083 self.checkPatternError(br"[\911]", r'bad escape \9', 1)
1084 self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1)
1085
1086 def test_character_set_errors(self):
1087 self.checkPatternError(r'[', 'unterminated character set', 0)
1088 self.checkPatternError(r'[^', 'unterminated character set', 0)
1089 self.checkPatternError(r'[a', 'unterminated character set', 0)
1090 # bug 545855 -- This pattern failed to cause a compile error as it
1091 # should, instead provoking a TypeError.
1092 self.checkPatternError(r"[a-", 'unterminated character set', 0)
1093 self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1)
1094 self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1)
1095 self.checkPatternError(r"[b-a]", 'bad character range b-a', 1)
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +00001096
Skip Montanaro7d9963f2003-04-25 14:12:40 +00001097 def test_bug_113254(self):
1098 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
1099 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
1100 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
1101
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001102 def test_bug_527371(self):
1103 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001104 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001105 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
1106 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
R David Murray44b548d2016-09-08 13:59:53 -04001107 self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a')
1108 self.assertEqual(re.match(r"((a))", "a").lastindex, 1)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001109
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001110 def test_bug_418626(self):
1111 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
1112 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
1113 # pattern '*?' on a long string.
1114 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
1115 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
1116 20003)
1117 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001118 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +00001119 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001120 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001121
1122 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001123 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +00001124 self.assertEqual(re.compile(pat) and 1, 1)
1125
Skip Montanaro1e703c62003-04-25 15:40:28 +00001126 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001127 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +00001128 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001129 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
1130 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
1131 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +00001132
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001133 def test_nothing_to_repeat(self):
1134 for reps in '*', '+', '?', '{1,2}':
1135 for mod in '', '?':
1136 self.checkPatternError('%s%s' % (reps, mod),
1137 'nothing to repeat', 0)
1138 self.checkPatternError('(?:%s%s)' % (reps, mod),
1139 'nothing to repeat', 3)
1140
1141 def test_multiple_repeat(self):
1142 for outer_reps in '*', '+', '{1,2}':
1143 for outer_mod in '', '?':
1144 outer_op = outer_reps + outer_mod
1145 for inner_reps in '*', '+', '?', '{1,2}':
1146 for inner_mod in '', '?':
1147 inner_op = inner_reps + inner_mod
1148 self.checkPatternError(r'x%s%s' % (inner_op, outer_op),
1149 'multiple repeat', 1 + len(inner_op))
1150
Serhiy Storchakafa468162013-02-16 21:23:53 +02001151 def test_unlimited_zero_width_repeat(self):
1152 # Issue #9669
1153 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
1154 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
1155 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
1156 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
1157 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
1158 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
1159
Skip Montanaro1e703c62003-04-25 15:40:28 +00001160 def test_scanner(self):
1161 def s_ident(scanner, token): return token
1162 def s_operator(scanner, token): return "op%s" % token
1163 def s_float(scanner, token): return float(token)
1164 def s_int(scanner, token): return int(token)
1165
1166 scanner = Scanner([
1167 (r"[a-zA-Z_]\w*", s_ident),
1168 (r"\d+\.\d*", s_float),
1169 (r"\d+", s_int),
1170 (r"=|\+|-|\*|/", s_operator),
1171 (r"\s+", None),
1172 ])
1173
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001174 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +00001175
Skip Montanaro1e703c62003-04-25 15:40:28 +00001176 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
1177 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
1178 'op+', 'bar'], ''))
1179
Skip Montanaro5ba00542003-04-25 16:00:14 +00001180 def test_bug_448951(self):
1181 # bug 448951 (similar to 429357, but with single char match)
1182 # (Also test greedy matches.)
1183 for op in '','?','*':
1184 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
1185 (None, None))
1186 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
1187 ('a:', 'a'))
1188
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001189 def test_bug_725106(self):
1190 # capturing groups in alternatives in repeats
1191 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
1192 ('b', 'a'))
1193 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
1194 ('c', 'b'))
1195 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
1196 ('b', None))
1197 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
1198 ('b', None))
1199 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
1200 ('b', 'a'))
1201 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
1202 ('c', 'b'))
1203 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
1204 ('b', None))
1205 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
1206 ('b', None))
1207
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +00001208 def test_bug_725149(self):
1209 # mark_stack_base restoring before restoring marks
1210 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
1211 ('a', None))
1212 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
1213 ('a', None, None))
1214
Just van Rossum12723ba2003-07-02 20:03:04 +00001215 def test_bug_764548(self):
1216 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001217 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +00001218 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001219 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +00001220
Skip Montanaro5ba00542003-04-25 16:00:14 +00001221 def test_finditer(self):
1222 iter = re.finditer(r":+", "a:b::c:::d")
1223 self.assertEqual([item.group(0) for item in iter],
1224 [":", "::", ":::"])
1225
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001226 pat = re.compile(r":+")
1227 iter = pat.finditer("a:b::c:::d", 1, 10)
1228 self.assertEqual([item.group(0) for item in iter],
1229 [":", "::", ":::"])
1230
1231 pat = re.compile(r":+")
1232 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
1233 self.assertEqual([item.group(0) for item in iter],
1234 [":", "::", ":::"])
1235
1236 pat = re.compile(r":+")
1237 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
1238 self.assertEqual([item.group(0) for item in iter],
1239 [":", "::", ":::"])
1240
1241 pat = re.compile(r":+")
1242 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
1243 self.assertEqual([item.group(0) for item in iter],
1244 ["::", "::"])
1245
Thomas Wouters40a088d2008-03-18 20:19:54 +00001246 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001247 self.assertIsNot(re.compile('bug_926075'),
1248 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +00001249
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001250 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001251 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001252 self.assertEqual(re.compile(pattern).split("a.b.c"),
1253 ['a','b','c'])
1254
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001255 def test_bug_581080(self):
1256 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +00001257 self.assertEqual(next(iter).span(), (1,2))
1258 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001259
1260 scanner = re.compile(r"\s").scanner("a b")
1261 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001262 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001263
1264 def test_bug_817234(self):
1265 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +00001266 self.assertEqual(next(iter).span(), (0, 4))
1267 self.assertEqual(next(iter).span(), (4, 4))
1268 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001269
Mark Dickinson1f268282009-07-28 17:22:36 +00001270 def test_bug_6561(self):
1271 # '\d' should match characters in Unicode category 'Nd'
1272 # (Number, Decimal Digit), but not those in 'Nl' (Number,
1273 # Letter) or 'No' (Number, Other).
1274 decimal_digits = [
1275 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
1276 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
1277 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
1278 ]
1279 for x in decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001280 self.assertEqual(re.match(r'^\d$', x).group(0), x)
Mark Dickinson1f268282009-07-28 17:22:36 +00001281
1282 not_decimal_digits = [
1283 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
1284 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
1285 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
1286 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
1287 ]
1288 for x in not_decimal_digits:
R David Murray44b548d2016-09-08 13:59:53 -04001289 self.assertIsNone(re.match(r'^\d$', x))
Mark Dickinson1f268282009-07-28 17:22:36 +00001290
Guido van Rossumd8faa362007-04-27 19:54:29 +00001291 def test_empty_array(self):
1292 # SF buf 1647541
1293 import array
Guido van Rossum166746c2007-07-03 15:39:16 +00001294 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +00001295 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001296 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +00001297 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001298
Christian Heimes072c0f12008-01-03 23:01:04 +00001299 def test_inline_flags(self):
1300 # Bug #1700
Serhiy Storchakaab140882014-11-11 21:13:28 +02001301 upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
1302 lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
Christian Heimes072c0f12008-01-03 23:01:04 +00001303
1304 p = re.compile(upper_char, re.I | re.U)
1305 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001306 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001307
1308 p = re.compile(lower_char, re.I | re.U)
1309 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001310 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001311
1312 p = re.compile('(?i)' + upper_char, re.U)
1313 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001314 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001315
1316 p = re.compile('(?i)' + lower_char, re.U)
1317 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001318 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001319
1320 p = re.compile('(?iu)' + upper_char)
1321 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001322 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001323
1324 p = re.compile('(?iu)' + lower_char)
1325 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001326 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001327
Serhiy Storchakad65cd092016-09-11 01:39:01 +03001328 self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
1329 self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
1330
Serhiy Storchakaabf275a2016-09-17 01:29:58 +03001331 p = upper_char + '(?i)'
1332 with self.assertWarns(DeprecationWarning) as warns:
1333 self.assertTrue(re.match(p, lower_char))
1334 self.assertEqual(
1335 str(warns.warnings[0].message),
1336 'Flags not at the start of the expression %s' % p
1337 )
1338
1339 p = upper_char + '(?i)%s' % ('.?' * 100)
1340 with self.assertWarns(DeprecationWarning) as warns:
1341 self.assertTrue(re.match(p, lower_char))
1342 self.assertEqual(
1343 str(warns.warnings[0].message),
1344 'Flags not at the start of the expression %s (truncated)' % p[:20]
1345 )
Serhiy Storchakabd48d272016-09-11 12:50:02 +03001346
Christian Heimes25bb7832008-01-11 16:17:00 +00001347 def test_dollar_matches_twice(self):
1348 "$ matches the end of string, and just before the terminating \n"
1349 pattern = re.compile('$')
1350 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
1351 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
1352 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1353
1354 pattern = re.compile('$', re.MULTILINE)
1355 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1356 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1357 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1358
Antoine Pitroufd036452008-08-19 17:56:33 +00001359 def test_bytes_str_mixing(self):
1360 # Mixing str and bytes is disallowed
1361 pat = re.compile('.')
1362 bpat = re.compile(b'.')
1363 self.assertRaises(TypeError, pat.match, b'b')
1364 self.assertRaises(TypeError, bpat.match, 'b')
1365 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1366 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1367 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1368 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1369 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1370 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1371
1372 def test_ascii_and_unicode_flag(self):
1373 # String patterns
1374 for flags in (0, re.UNICODE):
1375 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001376 self.assertTrue(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001377 pat = re.compile(r'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001378 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001379 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001380 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001381 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001382 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001383 pat = re.compile(r'\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001384 self.assertIsNone(pat.match('\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001385 pat = re.compile(r'(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001386 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001387 # Bytes patterns
1388 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001389 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001390 self.assertIsNone(pat.match(b'\xe0'))
R David Murray44b548d2016-09-08 13:59:53 -04001391 pat = re.compile(br'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001392 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001393 # Incompatibilities
R David Murray44b548d2016-09-08 13:59:53 -04001394 self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE)
1395 self.assertRaises(ValueError, re.compile, br'(?u)\w')
1396 self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII)
1397 self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII)
1398 self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE)
1399 self.assertRaises(ValueError, re.compile, r'(?au)\w')
Antoine Pitroufd036452008-08-19 17:56:33 +00001400
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001401 def test_locale_flag(self):
1402 import locale
Benjamin Peterson21a74312017-03-07 22:48:09 -08001403 _, enc = locale.getlocale(locale.LC_CTYPE)
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001404 # Search non-ASCII letter
1405 for i in range(128, 256):
1406 try:
1407 c = bytes([i]).decode(enc)
1408 sletter = c.lower()
1409 if sletter == c: continue
1410 bletter = sletter.encode(enc)
1411 if len(bletter) != 1: continue
1412 if bletter.decode(enc) != sletter: continue
1413 bpat = re.escape(bytes([i]))
1414 break
1415 except (UnicodeError, TypeError):
1416 pass
Benjamin Peterson1e687162017-03-01 21:53:00 -08001417 else:
1418 bletter = None
1419 bpat = b'A'
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001420 # Bytes patterns
1421 pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
1422 if bletter:
1423 self.assertTrue(pat.match(bletter))
1424 pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
1425 if bletter:
1426 self.assertTrue(pat.match(bletter))
1427 pat = re.compile(bpat, re.IGNORECASE)
1428 if bletter:
1429 self.assertIsNone(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001430 pat = re.compile(br'\w', re.LOCALE)
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001431 if bletter:
1432 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001433 pat = re.compile(br'(?L)\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001434 if bletter:
1435 self.assertTrue(pat.match(bletter))
R David Murray44b548d2016-09-08 13:59:53 -04001436 pat = re.compile(br'\w')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001437 if bletter:
1438 self.assertIsNone(pat.match(bletter))
1439 # Incompatibilities
Serhiy Storchaka9bd85b82016-06-11 19:15:00 +03001440 self.assertRaises(ValueError, re.compile, '', re.LOCALE)
1441 self.assertRaises(ValueError, re.compile, '(?L)')
1442 self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
1443 self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
1444 self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
1445 self.assertRaises(ValueError, re.compile, b'(?aL)')
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001446
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001447 def test_scoped_flags(self):
1448 self.assertTrue(re.match(r'(?i:a)b', 'Ab'))
1449 self.assertIsNone(re.match(r'(?i:a)b', 'aB'))
1450 self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE))
1451 self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE))
1452 self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
1453 self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
1454
1455 self.assertTrue(re.match(r'(?x: a) b', 'a b'))
1456 self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
1457 self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
1458 self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
1459
1460 self.checkPatternError(r'(?a:\w)',
1461 'bad inline flags: cannot turn on global flag', 3)
1462 self.checkPatternError(r'(?a)(?-a:\w)',
1463 'bad inline flags: cannot turn off global flag', 8)
1464 self.checkPatternError(r'(?i-i:a)',
1465 'bad inline flags: flag turned on and off', 5)
1466
1467 self.checkPatternError(r'(?-', 'missing flag', 3)
1468 self.checkPatternError(r'(?-+', 'missing flag', 3)
1469 self.checkPatternError(r'(?-z', 'unknown flag', 3)
1470 self.checkPatternError(r'(?-i', 'missing :', 4)
1471 self.checkPatternError(r'(?-i)', 'missing :', 4)
1472 self.checkPatternError(r'(?-i+', 'missing :', 4)
1473 self.checkPatternError(r'(?-iz', 'unknown flag', 4)
1474 self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0)
1475 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
1476 self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
1477 self.checkPatternError(r'(?iz', 'unknown flag', 3)
1478
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001479 def test_bug_6509(self):
1480 # Replacement strings of both types must parse properly.
1481 # all strings
R David Murray44b548d2016-09-08 13:59:53 -04001482 pat = re.compile(r'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001483 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1484 pat = re.compile('a(.)')
1485 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1486 pat = re.compile('..')
1487 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1488
1489 # all bytes
R David Murray44b548d2016-09-08 13:59:53 -04001490 pat = re.compile(br'a(\w)')
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001491 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1492 pat = re.compile(b'a(.)')
1493 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1494 pat = re.compile(b'..')
1495 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1496
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001497 def test_dealloc(self):
1498 # issue 3299: check for segfault in debug build
1499 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001500 # the overflow limit is different on wide and narrow builds and it
1501 # depends on the definition of SRE_CODE (see sre.h).
1502 # 2**128 should be big enough to overflow on both. For smaller values
1503 # a RuntimeError is raised instead of OverflowError.
1504 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001505 self.assertRaises(TypeError, re.finditer, "a", {})
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001506 with self.assertRaises(OverflowError):
Victor Stinner726a57d2016-11-22 23:04:39 +01001507 _sre.compile("abc", 0, [long_overflow], 0, {}, ())
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001508 with self.assertRaises(TypeError):
1509 _sre.compile({}, 0, [], 0, [], [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001510
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001512 self.assertTrue(re.search("123.*-", '123abc-'))
1513 self.assertTrue(re.search("123.*-", '123\xe9-'))
1514 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1515 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1516 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001517
Ezio Melottidf723e12012-03-13 01:29:48 +02001518 def test_compile(self):
1519 # Test return value when given string and pattern as parameter
1520 pattern = re.compile('random pattern')
1521 self.assertIsInstance(pattern, re._pattern_type)
1522 same_pattern = re.compile(pattern)
1523 self.assertIsInstance(same_pattern, re._pattern_type)
1524 self.assertIs(same_pattern, pattern)
1525 # Test behaviour when not given a string or pattern as parameter
1526 self.assertRaises(TypeError, re.compile, 0)
1527
Antoine Pitroub33941a2012-12-03 20:55:56 +01001528 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001529 def test_large_search(self, size):
1530 # Issue #10182: indices were 32-bit-truncated.
1531 s = 'a' * size
1532 m = re.search('$', s)
1533 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001534 self.assertEqual(m.start(), size)
1535 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001536
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001537 # The huge memuse is because of re.sub() using a list and a join()
1538 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001539 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001540 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001541 # Issue #10182: indices were 32-bit-truncated.
1542 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001543 r, n = re.subn('', '', s)
1544 self.assertEqual(r, s)
1545 self.assertEqual(n, size + 1)
1546
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001547 def test_bug_16688(self):
1548 # Issue 16688: Backreferences make case-insensitive regex fail on
1549 # non-ASCII strings.
1550 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1551 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001552
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001553 def test_repeat_minmax_overflow(self):
1554 # Issue #13169
1555 string = "x" * 100000
1556 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1557 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1558 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1559 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1560 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1561 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1562 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1563 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1564 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1565 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1566 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1567
1568 @cpython_only
1569 def test_repeat_minmax_overflow_maxrepeat(self):
1570 try:
1571 from _sre import MAXREPEAT
1572 except ImportError:
1573 self.skipTest('requires _sre.MAXREPEAT constant')
1574 string = "x" * 100000
1575 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1576 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1577 (0, 100000))
1578 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1579 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1580 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1581 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1582
R David Murray26dfaac92013-04-14 13:00:54 -04001583 def test_backref_group_name_in_exception(self):
1584 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001585 self.checkPatternError('(?P=<foo>)',
1586 "bad character in group name '<foo>'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001587
1588 def test_group_name_in_exception(self):
1589 # Issue 17341: Poor error message when compiling invalid regex
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001590 self.checkPatternError('(?P<?foo>)',
1591 "bad character in group name '?foo'", 4)
R David Murray26dfaac92013-04-14 13:00:54 -04001592
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001593 def test_issue17998(self):
1594 for reps in '*', '+', '?', '{1}':
1595 for mod in '', '?':
1596 pattern = '.' + reps + mod + 'yz'
1597 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1598 ['xyz'], msg=pattern)
1599 pattern = pattern.encode()
1600 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1601 [b'xyz'], msg=pattern)
1602
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001603 def test_match_repr(self):
1604 for string in '[abracadabra]', S('[abracadabra]'):
1605 m = re.search(r'(.+)(.*?)\1', string)
1606 self.assertEqual(repr(m), "<%s.%s object; "
1607 "span=(1, 12), match='abracadabra'>" %
1608 (type(m).__module__, type(m).__qualname__))
1609 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1610 bytearray(b'[abracadabra]'),
1611 memoryview(b'[abracadabra]')):
R David Murray44b548d2016-09-08 13:59:53 -04001612 m = re.search(br'(.+)(.*?)\1', string)
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001613 self.assertEqual(repr(m), "<%s.%s object; "
1614 "span=(1, 12), match=b'abracadabra'>" %
1615 (type(m).__module__, type(m).__qualname__))
1616
1617 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1618 self.assertEqual(repr(first), "<%s.%s object; "
1619 "span=(0, 2), match='aa'>" %
1620 (type(second).__module__, type(first).__qualname__))
1621 self.assertEqual(repr(second), "<%s.%s object; "
1622 "span=(3, 5), match='bb'>" %
1623 (type(second).__module__, type(second).__qualname__))
1624
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001625
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001626 def test_bug_2537(self):
1627 # issue 2537: empty submatches
1628 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1629 for inner_op in ('{0,}', '*', '?'):
1630 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1631 m = r.match("xyyzy")
1632 self.assertEqual(m.group(0), "xyy")
1633 self.assertEqual(m.group(1), "")
1634 self.assertEqual(m.group(2), "y")
1635
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001636 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001637 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001638 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001639 re.compile(pat, re.DEBUG)
1640 dump = '''\
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001641SUBPATTERN 1 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001642 LITERAL 46
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001643SUBPATTERN None 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001644 BRANCH
1645 IN
1646 LITERAL 99
1647 LITERAL 104
1648 OR
1649 LITERAL 112
1650 LITERAL 121
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001651SUBPATTERN None 0 0
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001652 GROUPREF_EXISTS 1
1653 AT AT_END
1654 ELSE
1655 LITERAL 58
1656 LITERAL 32
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001657'''
1658 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001659 # Debug output is output again even a second time (bypassing
1660 # the cache -- issue #20426).
1661 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001662 re.compile(pat, re.DEBUG)
1663 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001664
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001665 def test_keyword_parameters(self):
1666 # Issue #20283: Accepting the string keyword parameter.
1667 pat = re.compile(r'(ab)')
1668 self.assertEqual(
1669 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1670 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001671 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1672 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001673 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1674 self.assertEqual(
1675 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1676 self.assertEqual(
1677 pat.split(string='abracadabra', maxsplit=1),
1678 ['', 'ab', 'racadabra'])
1679 self.assertEqual(
1680 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1681 (7, 9))
1682
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001683 def test_bug_20998(self):
1684 # Issue #20998: Fullmatch of repeated single character pattern
1685 # with ignore case.
1686 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1687
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02001688 def test_locale_caching(self):
1689 # Issue #22410
1690 oldlocale = locale.setlocale(locale.LC_CTYPE)
1691 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1692 for loc in 'en_US.iso88591', 'en_US.utf8':
1693 try:
1694 locale.setlocale(locale.LC_CTYPE, loc)
1695 except locale.Error:
1696 # Unsupported locale on this system
1697 self.skipTest('test needs %s locale' % loc)
1698
1699 re.purge()
1700 self.check_en_US_iso88591()
1701 self.check_en_US_utf8()
1702 re.purge()
1703 self.check_en_US_utf8()
1704 self.check_en_US_iso88591()
1705
1706 def check_en_US_iso88591(self):
1707 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1708 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1709 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1710 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1711 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1712 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1713 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1714
1715 def check_en_US_utf8(self):
1716 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1717 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1718 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1719 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1720 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1721 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1722 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1723
Serhiy Storchakaad446d52014-11-10 13:49:00 +02001724 def test_error(self):
1725 with self.assertRaises(re.error) as cm:
1726 re.compile('(\u20ac))')
1727 err = cm.exception
1728 self.assertIsInstance(err.pattern, str)
1729 self.assertEqual(err.pattern, '(\u20ac))')
1730 self.assertEqual(err.pos, 3)
1731 self.assertEqual(err.lineno, 1)
1732 self.assertEqual(err.colno, 4)
1733 self.assertIn(err.msg, str(err))
1734 self.assertIn(' at position 3', str(err))
1735 self.assertNotIn(' at position 3', err.msg)
1736 # Bytes pattern
1737 with self.assertRaises(re.error) as cm:
1738 re.compile(b'(\xa4))')
1739 err = cm.exception
1740 self.assertIsInstance(err.pattern, bytes)
1741 self.assertEqual(err.pattern, b'(\xa4))')
1742 self.assertEqual(err.pos, 3)
1743 # Multiline pattern
1744 with self.assertRaises(re.error) as cm:
1745 re.compile("""
1746 (
1747 abc
1748 )
1749 )
1750 (
1751 """, re.VERBOSE)
1752 err = cm.exception
1753 self.assertEqual(err.pos, 77)
1754 self.assertEqual(err.lineno, 5)
1755 self.assertEqual(err.colno, 17)
1756 self.assertIn(err.msg, str(err))
1757 self.assertIn(' at position 77', str(err))
1758 self.assertIn('(line 5, column 17)', str(err))
1759
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001760 def test_misc_errors(self):
1761 self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0)
1762 self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0)
1763 self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5)
1764 self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
1765 self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
1766 self.checkPatternError(r'(?iz)', 'unknown flag', 3)
Serhiy Storchakabe9a4e52016-09-10 00:57:55 +03001767 self.checkPatternError(r'(?i', 'missing -, : or )', 3)
Serhiy Storchaka632a77e2015-03-25 21:03:47 +02001768 self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
1769 self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
1770 self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
1771 self.checkPatternError(r'(?', 'unexpected end of pattern', 2)
1772
Victor Stinner8bf43e62016-11-14 12:38:43 +01001773 def test_enum(self):
1774 # Issue #28082: Check that str(flag) returns a human readable string
1775 # instead of an integer
1776 self.assertIn('ASCII', str(re.A))
1777 self.assertIn('DOTALL', str(re.S))
1778
Victor Stinnerb44fb122016-11-21 16:35:08 +01001779 def test_pattern_compare(self):
1780 pattern1 = re.compile('abc', re.IGNORECASE)
1781
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01001782 # equal to itself
1783 self.assertEqual(pattern1, pattern1)
1784 self.assertFalse(pattern1 != pattern1)
1785
Victor Stinnerb44fb122016-11-21 16:35:08 +01001786 # equal
1787 re.purge()
1788 pattern2 = re.compile('abc', re.IGNORECASE)
1789 self.assertEqual(hash(pattern2), hash(pattern1))
1790 self.assertEqual(pattern2, pattern1)
1791
1792 # not equal: different pattern
1793 re.purge()
1794 pattern3 = re.compile('XYZ', re.IGNORECASE)
1795 # Don't test hash(pattern3) != hash(pattern1) because there is no
1796 # warranty that hash values are different
1797 self.assertNotEqual(pattern3, pattern1)
1798
1799 # not equal: different flag (flags=0)
1800 re.purge()
1801 pattern4 = re.compile('abc')
1802 self.assertNotEqual(pattern4, pattern1)
1803
1804 # only == and != comparison operators are supported
1805 with self.assertRaises(TypeError):
1806 pattern1 < pattern2
1807
1808 def test_pattern_compare_bytes(self):
1809 pattern1 = re.compile(b'abc')
1810
1811 # equal: test bytes patterns
1812 re.purge()
1813 pattern2 = re.compile(b'abc')
1814 self.assertEqual(hash(pattern2), hash(pattern1))
1815 self.assertEqual(pattern2, pattern1)
1816
1817 # not equal: pattern of a different types (str vs bytes),
1818 # comparison must not raise a BytesWarning
1819 re.purge()
1820 pattern3 = re.compile('abc')
1821 with warnings.catch_warnings():
1822 warnings.simplefilter('error', BytesWarning)
1823 self.assertNotEqual(pattern3, pattern1)
1824
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001825 def test_bug_29444(self):
1826 s = bytearray(b'abcdefgh')
1827 m = re.search(b'[a-h]+', s)
1828 m2 = re.search(b'[e-h]+', s)
1829 self.assertEqual(m.group(), b'abcdefgh')
1830 self.assertEqual(m2.group(), b'efgh')
1831 s[:] = b'xyz'
1832 self.assertEqual(m.group(), b'xyz')
1833 self.assertEqual(m2.group(), b'')
1834
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001835
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001836class PatternReprTests(unittest.TestCase):
1837 def check(self, pattern, expected):
1838 self.assertEqual(repr(re.compile(pattern)), expected)
1839
1840 def check_flags(self, pattern, flags, expected):
1841 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1842
1843 def test_without_flags(self):
1844 self.check('random pattern',
1845 "re.compile('random pattern')")
1846
1847 def test_single_flag(self):
1848 self.check_flags('random pattern', re.IGNORECASE,
1849 "re.compile('random pattern', re.IGNORECASE)")
1850
1851 def test_multiple_flags(self):
1852 self.check_flags('random pattern', re.I|re.S|re.X,
1853 "re.compile('random pattern', "
1854 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1855
1856 def test_unicode_flag(self):
1857 self.check_flags('random pattern', re.U,
1858 "re.compile('random pattern')")
1859 self.check_flags('random pattern', re.I|re.S|re.U,
1860 "re.compile('random pattern', "
1861 "re.IGNORECASE|re.DOTALL)")
1862
1863 def test_inline_flags(self):
1864 self.check('(?i)pattern',
1865 "re.compile('(?i)pattern', re.IGNORECASE)")
1866
1867 def test_unknown_flags(self):
1868 self.check_flags('random pattern', 0x123000,
1869 "re.compile('random pattern', 0x123000)")
1870 self.check_flags('random pattern', 0x123000|re.I,
1871 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1872
1873 def test_bytes(self):
1874 self.check(b'bytes pattern',
1875 "re.compile(b'bytes pattern')")
1876 self.check_flags(b'bytes pattern', re.A,
1877 "re.compile(b'bytes pattern', re.ASCII)")
1878
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001879 def test_locale(self):
1880 self.check_flags(b'bytes pattern', re.L,
1881 "re.compile(b'bytes pattern', re.LOCALE)")
1882
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001883 def test_quotes(self):
1884 self.check('random "double quoted" pattern',
1885 '''re.compile('random "double quoted" pattern')''')
1886 self.check("random 'single quoted' pattern",
1887 '''re.compile("random 'single quoted' pattern")''')
1888 self.check('''both 'single' and "double" quotes''',
1889 '''re.compile('both \\'single\\' and "double" quotes')''')
1890
1891 def test_long_pattern(self):
1892 pattern = 'Very %spattern' % ('long ' * 1000)
1893 r = repr(re.compile(pattern))
1894 self.assertLess(len(r), 300)
1895 self.assertEqual(r[:30], "re.compile('Very long long lon")
1896 r = repr(re.compile(pattern, re.I))
1897 self.assertLess(len(r), 300)
1898 self.assertEqual(r[:30], "re.compile('Very long long lon")
1899 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1900
1901
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001902class ImplementationTest(unittest.TestCase):
1903 """
1904 Test implementation details of the re module.
1905 """
1906
1907 def test_overlap_table(self):
1908 f = sre_compile._generate_overlap_table
1909 self.assertEqual(f(""), [])
1910 self.assertEqual(f("a"), [0])
1911 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1912 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1913 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1914 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1915
1916
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001917class ExternalTests(unittest.TestCase):
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001918
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001919 def test_re_benchmarks(self):
1920 're_tests benchmarks'
1921 from test.re_tests import benchmarks
1922 for pattern, s in benchmarks:
1923 with self.subTest(pattern=pattern, string=s):
1924 p = re.compile(pattern)
1925 self.assertTrue(p.search(s))
1926 self.assertTrue(p.match(s))
1927 self.assertTrue(p.fullmatch(s))
1928 s2 = ' '*10000 + s + ' '*10000
1929 self.assertTrue(p.search(s2))
1930 self.assertTrue(p.match(s2, 10000))
1931 self.assertTrue(p.match(s2, 10000, 10000 + len(s)))
1932 self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s)))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001933
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001934 def test_re_tests(self):
1935 're_tests test suite'
1936 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
1937 for t in tests:
1938 pattern = s = outcome = repl = expected = None
1939 if len(t) == 5:
1940 pattern, s, outcome, repl, expected = t
1941 elif len(t) == 3:
1942 pattern, s, outcome = t
Guido van Rossum41360a41998-03-26 19:42:58 +00001943 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001944 raise ValueError('Test tuples should have 3 or 5 fields', t)
1945
1946 with self.subTest(pattern=pattern, string=s):
1947 if outcome == SYNTAX_ERROR: # Expected a syntax error
1948 with self.assertRaises(re.error):
1949 re.compile(pattern)
1950 continue
1951
1952 obj = re.compile(pattern)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001953 result = obj.search(s)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001954 if outcome == FAIL:
1955 self.assertIsNone(result, 'Succeeded incorrectly')
1956 continue
1957
1958 with self.subTest():
1959 self.assertTrue(result, 'Failed incorrectly')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001960 # Matched, as expected, so now we compute the
1961 # result string and compare it to our expected result.
1962 start, end = result.span(0)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001963 vardict = {'found': result.group(0),
1964 'groups': result.group(),
1965 'flags': result.re.flags}
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001966 for i in range(1, 100):
1967 try:
1968 gi = result.group(i)
1969 # Special hack because else the string concat fails:
1970 if gi is None:
1971 gi = "None"
1972 except IndexError:
1973 gi = "Error"
1974 vardict['g%d' % i] = gi
1975 for i in result.re.groupindex.keys():
1976 try:
1977 gi = result.group(i)
1978 if gi is None:
1979 gi = "None"
1980 except IndexError:
1981 gi = "Error"
1982 vardict[i] = gi
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001983 self.assertEqual(eval(repl, vardict), expected,
1984 'grouping error')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001985
Antoine Pitrou22628c42008-07-22 17:53:22 +00001986 # Try the match with both pattern and string converted to
1987 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001988 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001989 bpat = bytes(pattern, "ascii")
1990 bs = bytes(s, "ascii")
1991 except UnicodeEncodeError:
1992 # skip non-ascii tests
1993 pass
1994 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001995 with self.subTest('bytes pattern match'):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001996 obj = re.compile(bpat)
1997 self.assertTrue(obj.search(bs))
1998
1999 # Try the match with LOCALE enabled, and check that it
2000 # still succeeds.
2001 with self.subTest('locale-sensitive match'):
2002 obj = re.compile(bpat, re.LOCALE)
2003 result = obj.search(bs)
2004 if result is None:
2005 print('=== Fails on locale-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002006
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002007 # Try the match with the search area limited to the extent
2008 # of the match and see if it still succeeds. \B will
2009 # break (because it won't match at the end or start of a
2010 # string), so we'll ignore patterns that feature it.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002011 if (pattern[:2] != r'\B' and pattern[-2:] != r'\B'
2012 and result is not None):
2013 with self.subTest('range-limited match'):
2014 obj = re.compile(pattern)
2015 self.assertTrue(obj.search(s, start, end + 1))
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00002016
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002017 # Try the match with IGNORECASE enabled, and check that it
2018 # still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002019 with self.subTest('case-insensitive match'):
2020 obj = re.compile(pattern, re.IGNORECASE)
2021 self.assertTrue(obj.search(s))
Guido van Rossumdfa67901997-12-08 17:12:06 +00002022
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002023 # Try the match with UNICODE locale enabled, and check
2024 # that it still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002025 with self.subTest('unicode-sensitive match'):
2026 obj = re.compile(pattern, re.UNICODE)
2027 self.assertTrue(obj.search(s))
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00002028
Gregory P. Smith5a631832010-07-27 05:31:29 +00002029
Skip Montanaro8ed06da2003-04-24 19:43:18 +00002030if __name__ == "__main__":
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02002031 unittest.main()