blob: 0fbf8c5a23795fa25d2c4d93559ef9624a01fd5d [file] [log] [blame]
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
Antoine Pitroud2cc7432014-02-03 20:59:59 +01002 cpython_only, captured_stdout
Benjamin Petersone48944b2012-03-07 14:50:25 -06003import io
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02004import locale
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02007import sre_compile
R David Murray26dfaac92013-04-14 13:00:54 -04008import sre_constants
Ezio Melottid2114eb2011-03-25 14:08:44 +02009import sys
10import string
11import traceback
Antoine Pitrou79aa68d2013-10-25 21:36:10 +020012import unittest
Raymond Hettinger027bb632004-05-31 03:09:25 +000013from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000014
Guido van Rossum23b22571997-07-17 22:36:14 +000015# Misc tests from Tim Peters' re.doc
16
Just van Rossum6802c6e2003-07-02 14:36:59 +000017# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020018# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000019# cover most of the code.
20
Serhiy Storchaka25324972013-10-16 12:46:28 +030021class S(str):
22 def __getitem__(self, index):
23 return S(super().__getitem__(index))
24
25class B(bytes):
26 def __getitem__(self, index):
27 return B(super().__getitem__(index))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000030
Serhiy Storchaka25324972013-10-16 12:46:28 +030031 def assertTypedEqual(self, actual, expect, msg=None):
32 self.assertEqual(actual, expect, msg)
33 def recurse(actual, expect):
34 if isinstance(expect, (tuple, list)):
35 for x, y in zip(actual, expect):
36 recurse(x, y)
37 else:
38 self.assertIs(type(actual), type(expect), msg)
39 recurse(actual, expect)
40
Benjamin Petersone48944b2012-03-07 14:50:25 -060041 def test_keep_buffer(self):
42 # See bug 14212
43 b = bytearray(b'x')
44 it = re.finditer(b'a', b)
45 with self.assertRaises(BufferError):
46 b.extend(b'x'*400)
47 list(it)
48 del it
49 gc_collect()
50 b.extend(b'x'*400)
51
Raymond Hettinger027bb632004-05-31 03:09:25 +000052 def test_weakref(self):
53 s = 'QabbbcR'
54 x = re.compile('ab+c')
55 y = proxy(x)
56 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
57
Skip Montanaro8ed06da2003-04-24 19:43:18 +000058 def test_search_star_plus(self):
59 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
60 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
61 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
62 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030063 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000064 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
65 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
66 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
67 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +030068 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000069
Skip Montanaro8ed06da2003-04-24 19:43:18 +000070 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000071 int_value = int(matchobj.group(0))
72 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000073
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 def test_basic_re_sub(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +030075 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
76 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
77 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
78 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
79 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
80 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030081 for y in ("\xe0", "\u0430", "\U0001d49c"):
82 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
Serhiy Storchaka25324972013-10-16 12:46:28 +030083
Skip Montanaro8ed06da2003-04-24 19:43:18 +000084 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
85 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
86 '9.3 -3 24x100y')
Victor Stinner55e614a2014-10-29 16:58:59 +010087 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
Skip Montanaro8ed06da2003-04-24 19:43:18 +000088 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000089
Skip Montanaro8ed06da2003-04-24 19:43:18 +000090 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
91 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000092
Skip Montanaro8ed06da2003-04-24 19:43:18 +000093 s = r"\1\1"
94 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
95 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
96 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000097
Skip Montanaro8ed06da2003-04-24 19:43:18 +000098 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
99 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
100 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
101 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +0000102
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000103 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
104 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
105 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
106 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
107 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +0000108
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000109 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +0000110
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000111 def test_bug_449964(self):
112 # fails for group followed by other escape
113 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
114 'xx\bxx\b')
115
116 def test_bug_449000(self):
117 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000118 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
119 'abc\ndef\n')
120 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
121 'abc\ndef\n')
122 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
123 'abc\ndef\n')
124 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
125 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +0000126
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000127 def test_bug_1661(self):
128 # Verify that flags do not get silently ignored with compiled patterns
129 pattern = re.compile('.')
130 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
131 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
132 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
133 self.assertRaises(ValueError, re.compile, pattern, re.I)
134
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000135 def test_bug_3629(self):
136 # A regex that triggered a bug in the sre-code validator
137 re.compile("(?P<quote>)(?(quote))")
138
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000139 def test_sub_template_numeric_escape(self):
140 # bug 776311 and friends
141 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
142 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
143 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
144 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
145 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
146 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
147 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
148
149 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
150 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
151
152 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
153 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
154 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
155 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
156 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
157
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300158 self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
159 self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
Tim Peters0e9980f2004-09-12 03:49:31 +0000160
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000161 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
162 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
163 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
164 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
165 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
166 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
167 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
168 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
169 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
170 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
171 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
172 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
173
174 # in python2.3 (etc), these loop endlessly in sre_parser.py
175 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
176 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
177 'xz8')
178 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
179 'xza')
180
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000181 def test_qualified_re_sub(self):
182 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
Victor Stinner55e614a2014-10-29 16:58:59 +0100183 self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000184
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000185 def test_bug_114660(self):
186 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
187 'hello there')
188
189 def test_bug_462270(self):
190 # Test for empty sub() behaviour, see SF bug #462270
191 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
192 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
193
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200194 def test_symbolic_groups(self):
195 re.compile('(?P<a>x)(?P=a)(?(a)y)')
196 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300197 re.compile('(?P<a1>x)\1(?(1)y)')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200198 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
199 self.assertRaises(re.error, re.compile, '(?Px)')
200 self.assertRaises(re.error, re.compile, '(?P=)')
201 self.assertRaises(re.error, re.compile, '(?P=1)')
202 self.assertRaises(re.error, re.compile, '(?P=a)')
203 self.assertRaises(re.error, re.compile, '(?P=a1)')
204 self.assertRaises(re.error, re.compile, '(?P=a.)')
205 self.assertRaises(re.error, re.compile, '(?P<)')
206 self.assertRaises(re.error, re.compile, '(?P<>)')
207 self.assertRaises(re.error, re.compile, '(?P<1>)')
208 self.assertRaises(re.error, re.compile, '(?P<a.>)')
209 self.assertRaises(re.error, re.compile, '(?())')
210 self.assertRaises(re.error, re.compile, '(?(a))')
211 self.assertRaises(re.error, re.compile, '(?(1a))')
212 self.assertRaises(re.error, re.compile, '(?(a.))')
Georg Brandl1d472b72013-04-14 11:40:00 +0200213 # New valid/invalid identifiers in Python 3
214 re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
215 re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
216 self.assertRaises(re.error, re.compile, '(?P<©>x)')
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300217 # Support > 100 groups.
218 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
219 pat = '(?:%s)(?(200)z|t)' % pat
220 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200221
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000222 def test_symbolic_refs(self):
223 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
224 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
225 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
226 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200227 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000228 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300229 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<2>', 'xx')
230 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\2', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000231 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300232 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
233 self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000234 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Georg Brandl1d472b72013-04-14 11:40:00 +0200235 # New valid/invalid identifiers in Python 3
236 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
237 self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
238 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300239 # Support > 100 groups.
240 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
241 self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000242
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000243 def test_re_subn(self):
244 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
245 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
246 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
247 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
Victor Stinner55e614a2014-10-29 16:58:59 +0100248 self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000249
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000250 def test_re_split(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300251 for string in ":a:b::c", S(":a:b::c"):
252 self.assertTypedEqual(re.split(":", string),
253 ['', 'a', 'b', '', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200254 self.assertTypedEqual(re.split(":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300255 ['', 'a', 'b', 'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200256 self.assertTypedEqual(re.split("(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300257 ['', ':', 'a', ':', 'b', '::', 'c'])
258 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
259 memoryview(b":a:b::c")):
260 self.assertTypedEqual(re.split(b":", string),
261 [b'', b'a', b'b', b'', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200262 self.assertTypedEqual(re.split(b":+", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300263 [b'', b'a', b'b', b'c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200264 self.assertTypedEqual(re.split(b"(:+)", string),
Serhiy Storchaka25324972013-10-16 12:46:28 +0300265 [b'', b':', b'a', b':', b'b', b'::', b'c'])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300266 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
267 "\U0001d49c\U0001d49e\U0001d4b5"):
268 string = ":%s:%s::%s" % (a, b, c)
269 self.assertEqual(re.split(":", string), ['', a, b, '', c])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200270 self.assertEqual(re.split(":+", string), ['', a, b, c])
271 self.assertEqual(re.split("(:+)", string),
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300272 ['', ':', a, ':', b, '::', c])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300273
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200274 self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
275 self.assertEqual(re.split("(:)+", ":a:b::c"),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000276 ['', ':', 'a', ':', 'b', ':', 'c'])
277 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
278 ['', ':', 'a', ':b::', 'c'])
279 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
280 ['', None, ':', 'a', None, ':', '', 'b', None, '',
281 None, '::', 'c'])
282 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
283 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000284
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200285 for sep, expected in [
286 (':*', ['', 'a', 'b', 'c']),
287 ('(?::*)', ['', 'a', 'b', 'c']),
288 ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
289 ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
290 ]:
291 with self.subTest(sep=sep), self.assertWarns(FutureWarning):
292 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
293
294 for sep, expected in [
295 ('', [':a:b::c']),
296 (r'\b', [':a:b::c']),
297 (r'(?=:)', [':a:b::c']),
298 (r'(?<=:)', [':a:b::c']),
299 ]:
300 with self.subTest(sep=sep), self.assertRaises(ValueError):
301 self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
302
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000303 def test_qualified_re_split(self):
Victor Stinner55e614a2014-10-29 16:58:59 +0100304 self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
305 self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
306 self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000307 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200308 self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000309 ['', ':', 'a', ':', 'b::c'])
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200310 with self.assertWarns(FutureWarning):
311 self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
312 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000313
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000314 def test_re_findall(self):
315 self.assertEqual(re.findall(":+", "abc"), [])
Serhiy Storchaka25324972013-10-16 12:46:28 +0300316 for string in "a:b::c:::d", S("a:b::c:::d"):
317 self.assertTypedEqual(re.findall(":+", string),
318 [":", "::", ":::"])
319 self.assertTypedEqual(re.findall("(:+)", string),
320 [":", "::", ":::"])
321 self.assertTypedEqual(re.findall("(:)(:*)", string),
322 [(":", ""), (":", ":"), (":", "::")])
323 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
324 memoryview(b"a:b::c:::d")):
325 self.assertTypedEqual(re.findall(b":+", string),
326 [b":", b"::", b":::"])
327 self.assertTypedEqual(re.findall(b"(:+)", string),
328 [b":", b"::", b":::"])
329 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
330 [(b":", b""), (b":", b":"), (b":", b"::")])
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300331 for x in ("\xe0", "\u0430", "\U0001d49c"):
332 xx = x * 2
333 xxx = x * 3
334 string = "a%sb%sc%sd" % (x, xx, xxx)
335 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
336 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
337 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
338 [(x, ""), (x, x), (x, xx)])
Guido van Rossum49946571997-07-18 04:26:25 +0000339
Skip Montanaro5ba00542003-04-25 16:00:14 +0000340 def test_bug_117612(self):
341 self.assertEqual(re.findall(r"(a|(b))", "aba"),
342 [("a", ""),("b", "b"),("a", "")])
343
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000344 def test_re_match(self):
Serhiy Storchaka25324972013-10-16 12:46:28 +0300345 for string in 'a', S('a'):
346 self.assertEqual(re.match('a', string).groups(), ())
347 self.assertEqual(re.match('(a)', string).groups(), ('a',))
348 self.assertEqual(re.match('(a)', string).group(0), 'a')
349 self.assertEqual(re.match('(a)', string).group(1), 'a')
350 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
351 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
352 self.assertEqual(re.match(b'a', string).groups(), ())
353 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
354 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
355 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
356 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300357 for a in ("\xe0", "\u0430", "\U0001d49c"):
358 self.assertEqual(re.match(a, a).groups(), ())
359 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
360 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
361 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
362 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
Guido van Rossum49946571997-07-18 04:26:25 +0000363
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000364 pat = re.compile('((a)|(b))(c)?')
365 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
366 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
367 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
368 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
369 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000370
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000371 # A single group
372 m = re.match('(a)', 'a')
373 self.assertEqual(m.group(0), 'a')
374 self.assertEqual(m.group(0), 'a')
375 self.assertEqual(m.group(1), 'a')
376 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000377
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000378 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
379 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
380 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
381 (None, 'b', None))
382 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000383
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200384 def test_re_fullmatch(self):
385 # Issue 16203: Proposal: add re.fullmatch() method.
386 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
387 for string in "ab", S("ab"):
388 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
389 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
390 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
391 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
392 r = r"%s|%s" % (a, a + b)
393 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
394 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
395 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
396 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
397 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
398 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
399 self.assertIsNone(re.fullmatch(r"a+", "ab"))
400 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
401 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
402 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
403 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
404 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
405 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
406
407 self.assertEqual(
408 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
409 self.assertEqual(
410 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
411 self.assertEqual(
412 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
413
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000414 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000415 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
416 ('(', 'a'))
417 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
418 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300419 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
420 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000421 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
422 ('a', 'b'))
423 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
424 (None, 'd'))
425 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
426 (None, 'd'))
427 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
428 ('a', ''))
429
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000430 # Tests for bug #1177831: exercise groups other than the first group
431 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
432 self.assertEqual(p.match('abc').groups(),
433 ('a', 'b', 'c'))
434 self.assertEqual(p.match('ad').groups(),
435 ('a', None, 'd'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300436 self.assertIsNone(p.match('abd'))
437 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000438
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300439 # Support > 100 groups.
440 pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
441 pat = '(?:%s)(?(200)z)' % pat
442 self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000443
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000444 def test_re_groupref(self):
445 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
446 ('|', 'a'))
447 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
448 (None, 'a'))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300449 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
450 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000451 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
452 ('a', 'a'))
453 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
454 (None, None))
455
456 def test_groupdict(self):
457 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
458 'first second').groupdict(),
459 {'first':'first', 'second':'second'})
460
461 def test_expand(self):
462 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
463 "first second")
464 .expand(r"\2 \1 \g<second> \g<first>"),
465 "second first second first")
Serhiy Storchaka7438e4b2014-10-10 11:06:31 +0300466 self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
467 "first")
468 .expand(r"\2 \g<second>"),
469 " ")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000470
471 def test_repeat_minmax(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300472 self.assertIsNone(re.match("^(\w){1}$", "abc"))
473 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
474 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
475 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000476
477 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
478 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
479 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
480 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
481 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
482 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
483 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
484 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
485
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300486 self.assertIsNone(re.match("^x{1}$", "xxx"))
487 self.assertIsNone(re.match("^x{1}?$", "xxx"))
488 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
489 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000490
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300491 self.assertTrue(re.match("^x{3}$", "xxx"))
492 self.assertTrue(re.match("^x{1,3}$", "xxx"))
493 self.assertTrue(re.match("^x{1,4}$", "xxx"))
494 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
495 self.assertTrue(re.match("^x{3}?$", "xxx"))
496 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
497 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
498 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000499
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300500 self.assertIsNone(re.match("^x{}$", "xxx"))
501 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000502
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000503 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000504 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000505 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000506 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
507 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
508 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
509 {'first': 1, 'other': 2})
510
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000511 self.assertEqual(re.match("(a)", "a").pos, 0)
512 self.assertEqual(re.match("(a)", "a").endpos, 1)
513 self.assertEqual(re.match("(a)", "a").string, "a")
514 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300515 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000516
517 def test_special_escapes(self):
518 self.assertEqual(re.search(r"\b(b.)\b",
519 "abcd abc bcd bx").group(1), "bx")
520 self.assertEqual(re.search(r"\B(b.)\B",
521 "abc bcd bc abxd").group(1), "bx")
522 self.assertEqual(re.search(r"\b(b.)\b",
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300523 "abcd abc bcd bx", re.ASCII).group(1), "bx")
524 self.assertEqual(re.search(r"\B(b.)\B",
525 "abc bcd bc abxd", re.ASCII).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000526 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
527 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300528 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300529 self.assertEqual(re.search(br"\b(b.)\b",
530 b"abcd abc bcd bx").group(1), b"bx")
531 self.assertEqual(re.search(br"\B(b.)\B",
532 b"abc bcd bc abxd").group(1), b"bx")
533 self.assertEqual(re.search(br"\b(b.)\b",
534 b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")
535 self.assertEqual(re.search(br"\B(b.)\B",
536 b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
537 self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
538 self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300539 self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000540 self.assertEqual(re.search(r"\d\D\w\W\s\S",
541 "1aa! a").group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300542 self.assertEqual(re.search(br"\d\D\w\W\s\S",
543 b"1aa! a").group(0), b"1aa! a")
544 self.assertEqual(re.search(r"\d\D\w\W\s\S",
545 "1aa! a", re.ASCII).group(0), "1aa! a")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300546 self.assertEqual(re.search(br"\d\D\w\W\s\S",
547 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000548
Serhiy Storchakab99c1322014-11-10 14:38:16 +0200549 def test_other_escapes(self):
550 self.assertRaises(re.error, re.compile, "\\")
551 self.assertEqual(re.match(r"\(", '(').group(), '(')
552 self.assertIsNone(re.match(r"\(", ')'))
553 self.assertEqual(re.match(r"\\", '\\').group(), '\\')
554 self.assertEqual(re.match(r"\y", 'y').group(), 'y')
555 self.assertIsNone(re.match(r"\y", 'z'))
556 self.assertEqual(re.match(r"[\]]", ']').group(), ']')
557 self.assertIsNone(re.match(r"[\]]", '['))
558 self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
559 self.assertIsNone(re.match(r"[a\-c]", 'b'))
560 self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
561 self.assertIsNone(re.match(r"[\^a]+", 'b'))
562
Ezio Melotti5a045b92012-02-29 11:48:44 +0200563 def test_string_boundaries(self):
564 # See http://bugs.python.org/issue10713
565 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
566 "abc")
567 # There's a word boundary at the start of a string.
568 self.assertTrue(re.match(r"\b", "abc"))
569 # A non-empty string includes a non-boundary zero-length match.
570 self.assertTrue(re.search(r"\B", "abc"))
571 # There is no non-boundary match at the start of a string.
572 self.assertFalse(re.match(r"\B", "abc"))
573 # However, an empty string contains no word boundaries, and also no
574 # non-boundaries.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300575 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200576 # This one is questionable and different from the perlre behaviour,
577 # but describes current behavior.
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300578 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti5a045b92012-02-29 11:48:44 +0200579 # A single word-character string has two boundaries, but no
580 # non-boundary gaps.
581 self.assertEqual(len(re.findall(r"\b", "a")), 2)
582 self.assertEqual(len(re.findall(r"\B", "a")), 0)
583 # If there are no words, there are no boundaries
584 self.assertEqual(len(re.findall(r"\b", " ")), 0)
585 self.assertEqual(len(re.findall(r"\b", " ")), 0)
586 # Can match around the whitespace.
587 self.assertEqual(len(re.findall(r"\B", " ")), 2)
588
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000589 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000590 self.assertEqual(re.match("([\u2222\u2223])",
591 "\u2222").group(1), "\u2222")
Serhiy Storchakabe80fc92013-10-24 22:02:58 +0300592 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300593 self.assertEqual(re.match(r, "\uff01").group(), "\uff01")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000594
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100595 def test_big_codesize(self):
596 # Issue #1160
597 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300598 self.assertTrue(r.match('1000'))
599 self.assertTrue(r.match('9999'))
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100600
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000601 def test_anyall(self):
602 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
603 "a\nb")
604 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
605 "a\n\nb")
606
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200607 def test_lookahead(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000608 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
609 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
610 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
611 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
612 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
613 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
614 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
615
616 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
617 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
618 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
619 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
620
Serhiy Storchaka4eea62f2015-02-21 10:07:35 +0200621 # Group reference.
622 self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
623 self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
624 # Conditional group reference.
625 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
626 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
627 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
628 self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
629 self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
630 # Group used before defined.
631 self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
632 self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
633 self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
634
635 def test_lookbehind(self):
636 self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
637 self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
638 self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
639 self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
640 # Group reference.
641 self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
642 self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
643 self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
644 self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
645 # Conditional group reference.
646 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
647 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
648 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
649 self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
650 self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
651 # Group used before defined.
652 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
653 self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
654 self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
655 # Group defined in the same lookbehind pattern
656 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
657 self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
658 self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
659 self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
660
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000661 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000662 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300663 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000664 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
665 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
666 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
667 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
668 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
669 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
670 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
671 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
672
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200673 assert '\u212a'.lower() == 'k' # 'K'
674 self.assertTrue(re.match(r'K', '\u212a', re.I))
675 self.assertTrue(re.match(r'k', '\u212a', re.I))
676 self.assertTrue(re.match(r'\u212a', 'K', re.I))
677 self.assertTrue(re.match(r'\u212a', 'k', re.I))
678 assert '\u017f'.upper() == 'S' # 'ſ'
679 self.assertTrue(re.match(r'S', '\u017f', re.I))
680 self.assertTrue(re.match(r's', '\u017f', re.I))
681 self.assertTrue(re.match(r'\u017f', 'S', re.I))
682 self.assertTrue(re.match(r'\u017f', 's', re.I))
683 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
684 self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
685 self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
686
687 def test_ignore_case_set(self):
688 self.assertTrue(re.match(r'[19A]', 'A', re.I))
689 self.assertTrue(re.match(r'[19a]', 'a', re.I))
690 self.assertTrue(re.match(r'[19a]', 'A', re.I))
691 self.assertTrue(re.match(r'[19A]', 'a', re.I))
692 self.assertTrue(re.match(br'[19A]', b'A', re.I))
693 self.assertTrue(re.match(br'[19a]', b'a', re.I))
694 self.assertTrue(re.match(br'[19a]', b'A', re.I))
695 self.assertTrue(re.match(br'[19A]', b'a', re.I))
696 assert '\u212a'.lower() == 'k' # 'K'
697 self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
698 self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
699 self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
700 self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
701 assert '\u017f'.upper() == 'S' # 'ſ'
702 self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
703 self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
704 self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
705 self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
706 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
707 self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
708 self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
709
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200710 def test_ignore_case_range(self):
711 # Issues #3511, #17381.
712 self.assertTrue(re.match(r'[9-a]', '_', re.I))
713 self.assertIsNone(re.match(r'[9-A]', '_', re.I))
714 self.assertTrue(re.match(br'[9-a]', b'_', re.I))
715 self.assertIsNone(re.match(br'[9-A]', b'_', re.I))
716 self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))
717 self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))
718 self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))
719 self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))
720 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))
721 self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))
722 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))
723 self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))
724 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))
725 self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))
726 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
727 self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
728
Serhiy Storchaka0c938f62014-11-10 12:37:16 +0200729 assert '\u212a'.lower() == 'k' # 'K'
730 self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
731 self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
732 self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
733 self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
734 assert '\u017f'.upper() == 'S' # 'ſ'
735 self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
736 self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
737 self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
738 self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
739 assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
740 self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
741 self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
742
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000743 def test_category(self):
744 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
745
746 def test_getlower(self):
747 import _sre
748 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
749 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
750 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200751 self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000752
753 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Serhiy Storchakaa25875c2014-09-14 15:56:27 +0300754 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200755 self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
756 self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000757
758 def test_not_literal(self):
759 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
760 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
761
762 def test_search_coverage(self):
763 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
764 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
765
Ezio Melottid2114eb2011-03-25 14:08:44 +0200766 def assertMatch(self, pattern, text, match=None, span=None,
767 matcher=re.match):
768 if match is None and span is None:
769 # the pattern matches the whole text
770 match = text
771 span = (0, len(text))
772 elif match is None or span is None:
773 raise ValueError('If match is not None, span should be specified '
774 '(and vice versa).')
775 m = matcher(pattern, text)
776 self.assertTrue(m)
777 self.assertEqual(m.group(), match)
778 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000779
Ezio Melottid2114eb2011-03-25 14:08:44 +0200780 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300781 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200782 p = ''.join(chr(i) for i in range(256))
783 for c in p:
784 if c in alnum_chars:
785 self.assertEqual(re.escape(c), c)
786 elif c == '\x00':
787 self.assertEqual(re.escape(c), '\\000')
788 else:
789 self.assertEqual(re.escape(c), '\\' + c)
790 self.assertMatch(re.escape(c), c)
791 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000792
Guido van Rossum698280d2008-09-10 17:44:35 +0000793 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300794 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200795 p = bytes(range(256))
796 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000797 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200798 if b in alnum_chars:
799 self.assertEqual(re.escape(b), b)
800 elif i == 0:
801 self.assertEqual(re.escape(b), b'\\000')
802 else:
803 self.assertEqual(re.escape(b), b'\\' + b)
804 self.assertMatch(re.escape(b), b)
805 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000806
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200807 def test_re_escape_non_ascii(self):
808 s = 'xxx\u2620\u2620\u2620xxx'
809 s_escaped = re.escape(s)
810 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
811 self.assertMatch(s_escaped, s)
812 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
813 'x\u2620\u2620\u2620x', (2, 7), re.search)
814
815 def test_re_escape_non_ascii_bytes(self):
816 b = 'y\u2620y\u2620y'.encode('utf-8')
817 b_escaped = re.escape(b)
818 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
819 self.assertMatch(b_escaped, b)
820 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
821 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000822
Serhiy Storchakab85a9762014-09-15 11:33:19 +0300823 def test_pickling(self):
824 import pickle
825 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)
826 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
827 pickled = pickle.dumps(oldpat, proto)
828 newpat = pickle.loads(pickled)
829 self.assertEqual(newpat, oldpat)
830 # current pickle expects the _compile() reconstructor in re module
831 from re import _compile
Guido van Rossum23b22571997-07-17 22:36:14 +0000832
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000833 def test_constants(self):
834 self.assertEqual(re.I, re.IGNORECASE)
835 self.assertEqual(re.L, re.LOCALE)
836 self.assertEqual(re.M, re.MULTILINE)
837 self.assertEqual(re.S, re.DOTALL)
838 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000839
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000840 def test_flags(self):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200841 for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300842 self.assertTrue(re.compile('^pattern$', flag))
Serhiy Storchaka22a309a2014-12-01 11:50:07 +0200843 for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
844 self.assertTrue(re.compile(b'^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000845
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000846 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200847 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
848 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300849 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
850 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
851 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
852 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
853 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
854 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200855 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300856 self.assertTrue(re.match(r"\u%04x" % i, chr(i)))
857 self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))
858 self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))
859 self.assertTrue(re.match(r"\U%08x" % i, chr(i)))
860 self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))
861 self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))
862 self.assertTrue(re.match(r"\0", "\000"))
863 self.assertTrue(re.match(r"\08", "\0008"))
864 self.assertTrue(re.match(r"\01", "\001"))
865 self.assertTrue(re.match(r"\018", "\0018"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300866 self.assertRaises(re.error, re.match, r"\567", "")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200867 self.assertRaises(re.error, re.match, r"\911", "")
868 self.assertRaises(re.error, re.match, r"\x1", "")
869 self.assertRaises(re.error, re.match, r"\x1z", "")
870 self.assertRaises(re.error, re.match, r"\u123", "")
871 self.assertRaises(re.error, re.match, r"\u123z", "")
872 self.assertRaises(re.error, re.match, r"\U0001234", "")
873 self.assertRaises(re.error, re.match, r"\U0001234z", "")
874 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000875
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000876 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200877 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
878 if i < 256:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300879 self.assertTrue(re.match(r"[\%o]" % i, chr(i)))
880 self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))
881 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
882 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
883 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
884 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
885 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
886 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200887 if i < 0x10000:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300888 self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))
889 self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))
890 self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))
891 self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
892 self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
893 self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300894 self.assertRaises(re.error, re.match, r"[\567]", "")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200895 self.assertRaises(re.error, re.match, r"[\911]", "")
896 self.assertRaises(re.error, re.match, r"[\x1z]", "")
897 self.assertRaises(re.error, re.match, r"[\u123z]", "")
898 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
899 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300900 self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
Antoine Pitrou463badf2012-06-23 13:29:19 +0200901
902 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000903 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300904 self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))
905 self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
906 self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
907 self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
908 self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
909 self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
910 self.assertTrue(re.match(br"\u", b'u'))
911 self.assertTrue(re.match(br"\U", b'U'))
912 self.assertTrue(re.match(br"\0", b"\000"))
913 self.assertTrue(re.match(br"\08", b"\0008"))
914 self.assertTrue(re.match(br"\01", b"\001"))
915 self.assertTrue(re.match(br"\018", b"\0018"))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300916 self.assertRaises(re.error, re.match, br"\567", b"")
Antoine Pitrou463badf2012-06-23 13:29:19 +0200917 self.assertRaises(re.error, re.match, br"\911", b"")
918 self.assertRaises(re.error, re.match, br"\x1", b"")
919 self.assertRaises(re.error, re.match, br"\x1z", b"")
920
921 def test_sre_byte_class_literals(self):
922 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300923 self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
924 self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
925 self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
926 self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
927 self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
928 self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
929 self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
930 self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
931 self.assertTrue(re.match(br"[\u]", b'u'))
932 self.assertTrue(re.match(br"[\U]", b'U'))
Serhiy Storchakac563caf2014-09-23 23:22:41 +0300933 self.assertRaises(re.error, re.match, br"[\567]", b"")
Serhiy Storchakacd9032d2014-09-23 23:04:21 +0300934 self.assertRaises(re.error, re.match, br"[\911]", b"")
935 self.assertRaises(re.error, re.match, br"[\x1z]", b"")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000936
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000937 def test_bug_113254(self):
938 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
939 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
940 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
941
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000942 def test_bug_527371(self):
943 # bug described in patches 527371/672491
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +0300944 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000945 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
946 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
947 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
948 self.assertEqual(re.match("((a))", "a").lastindex, 1)
949
950 def test_bug_545855(self):
951 # bug 545855 -- This pattern failed to cause a compile error as it
952 # should, instead provoking a TypeError.
953 self.assertRaises(re.error, re.compile, 'foo[a-')
954
955 def test_bug_418626(self):
956 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
957 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
958 # pattern '*?' on a long string.
959 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
960 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
961 20003)
962 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000963 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000964 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000965 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000966
967 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000968 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000969 self.assertEqual(re.compile(pat) and 1, 1)
970
Skip Montanaro1e703c62003-04-25 15:40:28 +0000971 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000972 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000973 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000974 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
975 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
976 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000977
Serhiy Storchakafa468162013-02-16 21:23:53 +0200978 def test_unlimited_zero_width_repeat(self):
979 # Issue #9669
980 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
981 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
982 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
983 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
984 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
985 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
986
Skip Montanaro1e703c62003-04-25 15:40:28 +0000987 def test_scanner(self):
988 def s_ident(scanner, token): return token
989 def s_operator(scanner, token): return "op%s" % token
990 def s_float(scanner, token): return float(token)
991 def s_int(scanner, token): return int(token)
992
993 scanner = Scanner([
994 (r"[a-zA-Z_]\w*", s_ident),
995 (r"\d+\.\d*", s_float),
996 (r"\d+", s_int),
997 (r"=|\+|-|\*|/", s_operator),
998 (r"\s+", None),
999 ])
1000
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001001 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +00001002
Skip Montanaro1e703c62003-04-25 15:40:28 +00001003 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
1004 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
1005 'op+', 'bar'], ''))
1006
Skip Montanaro5ba00542003-04-25 16:00:14 +00001007 def test_bug_448951(self):
1008 # bug 448951 (similar to 429357, but with single char match)
1009 # (Also test greedy matches.)
1010 for op in '','?','*':
1011 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
1012 (None, None))
1013 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
1014 ('a:', 'a'))
1015
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001016 def test_bug_725106(self):
1017 # capturing groups in alternatives in repeats
1018 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
1019 ('b', 'a'))
1020 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
1021 ('c', 'b'))
1022 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
1023 ('b', None))
1024 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
1025 ('b', None))
1026 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
1027 ('b', 'a'))
1028 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
1029 ('c', 'b'))
1030 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
1031 ('b', None))
1032 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
1033 ('b', None))
1034
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +00001035 def test_bug_725149(self):
1036 # mark_stack_base restoring before restoring marks
1037 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
1038 ('a', None))
1039 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
1040 ('a', None, None))
1041
Just van Rossum12723ba2003-07-02 20:03:04 +00001042 def test_bug_764548(self):
1043 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001044 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +00001045 pat = re.compile(my_unicode("abc"))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001046 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +00001047
Skip Montanaro5ba00542003-04-25 16:00:14 +00001048 def test_finditer(self):
1049 iter = re.finditer(r":+", "a:b::c:::d")
1050 self.assertEqual([item.group(0) for item in iter],
1051 [":", "::", ":::"])
1052
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001053 pat = re.compile(r":+")
1054 iter = pat.finditer("a:b::c:::d", 1, 10)
1055 self.assertEqual([item.group(0) for item in iter],
1056 [":", "::", ":::"])
1057
1058 pat = re.compile(r":+")
1059 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
1060 self.assertEqual([item.group(0) for item in iter],
1061 [":", "::", ":::"])
1062
1063 pat = re.compile(r":+")
1064 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
1065 self.assertEqual([item.group(0) for item in iter],
1066 [":", "::", ":::"])
1067
1068 pat = re.compile(r":+")
1069 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
1070 self.assertEqual([item.group(0) for item in iter],
1071 ["::", "::"])
1072
Thomas Wouters40a088d2008-03-18 20:19:54 +00001073 def test_bug_926075(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001074 self.assertIsNot(re.compile('bug_926075'),
1075 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +00001076
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001077 def test_bug_931848(self):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001078 pattern = "[\u002E\u3002\uFF0E\uFF61]"
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +00001079 self.assertEqual(re.compile(pattern).split("a.b.c"),
1080 ['a','b','c'])
1081
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001082 def test_bug_581080(self):
1083 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +00001084 self.assertEqual(next(iter).span(), (1,2))
1085 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001086
1087 scanner = re.compile(r"\s").scanner("a b")
1088 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001089 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001090
1091 def test_bug_817234(self):
1092 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +00001093 self.assertEqual(next(iter).span(), (0, 4))
1094 self.assertEqual(next(iter).span(), (4, 4))
1095 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001096
Mark Dickinson1f268282009-07-28 17:22:36 +00001097 def test_bug_6561(self):
1098 # '\d' should match characters in Unicode category 'Nd'
1099 # (Number, Decimal Digit), but not those in 'Nl' (Number,
1100 # Letter) or 'No' (Number, Other).
1101 decimal_digits = [
1102 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
1103 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
1104 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
1105 ]
1106 for x in decimal_digits:
1107 self.assertEqual(re.match('^\d$', x).group(0), x)
1108
1109 not_decimal_digits = [
1110 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
1111 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
1112 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
1113 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
1114 ]
1115 for x in not_decimal_digits:
1116 self.assertIsNone(re.match('^\d$', x))
1117
Guido van Rossumd8faa362007-04-27 19:54:29 +00001118 def test_empty_array(self):
1119 # SF buf 1647541
1120 import array
Guido van Rossum166746c2007-07-03 15:39:16 +00001121 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 a = array.array(typecode)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001123 self.assertIsNone(re.compile(b"bla").match(a))
Antoine Pitroufd036452008-08-19 17:56:33 +00001124 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00001125
Christian Heimes072c0f12008-01-03 23:01:04 +00001126 def test_inline_flags(self):
1127 # Bug #1700
Serhiy Storchakaab140882014-11-11 21:13:28 +02001128 upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
1129 lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
Christian Heimes072c0f12008-01-03 23:01:04 +00001130
1131 p = re.compile(upper_char, re.I | re.U)
1132 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001133 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001134
1135 p = re.compile(lower_char, re.I | re.U)
1136 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001137 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001138
1139 p = re.compile('(?i)' + upper_char, re.U)
1140 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001141 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001142
1143 p = re.compile('(?i)' + lower_char, re.U)
1144 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001145 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001146
1147 p = re.compile('(?iu)' + upper_char)
1148 q = p.match(lower_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001149 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001150
1151 p = re.compile('(?iu)' + lower_char)
1152 q = p.match(upper_char)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001153 self.assertTrue(q)
Christian Heimes072c0f12008-01-03 23:01:04 +00001154
Christian Heimes25bb7832008-01-11 16:17:00 +00001155 def test_dollar_matches_twice(self):
1156 "$ matches the end of string, and just before the terminating \n"
1157 pattern = re.compile('$')
1158 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
1159 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
1160 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1161
1162 pattern = re.compile('$', re.MULTILINE)
1163 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
1164 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
1165 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
1166
Antoine Pitroufd036452008-08-19 17:56:33 +00001167 def test_bytes_str_mixing(self):
1168 # Mixing str and bytes is disallowed
1169 pat = re.compile('.')
1170 bpat = re.compile(b'.')
1171 self.assertRaises(TypeError, pat.match, b'b')
1172 self.assertRaises(TypeError, bpat.match, 'b')
1173 self.assertRaises(TypeError, pat.sub, b'b', 'c')
1174 self.assertRaises(TypeError, pat.sub, 'b', b'c')
1175 self.assertRaises(TypeError, pat.sub, b'b', b'c')
1176 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
1177 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
1178 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
1179
1180 def test_ascii_and_unicode_flag(self):
1181 # String patterns
1182 for flags in (0, re.UNICODE):
1183 pat = re.compile('\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001184 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001185 pat = re.compile('\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001186 self.assertTrue(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001187 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001188 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001189 pat = re.compile('(?a)\xc0', re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001190 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001191 pat = re.compile('\w', re.ASCII)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001192 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001193 pat = re.compile('(?a)\w')
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001194 self.assertIsNone(pat.match('\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001195 # Bytes patterns
1196 for flags in (0, re.ASCII):
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001197 pat = re.compile(b'\xc0', flags | re.IGNORECASE)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001198 self.assertIsNone(pat.match(b'\xe0'))
Serhiy Storchakaa25875c2014-09-14 15:56:27 +03001199 pat = re.compile(b'\w', flags)
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001200 self.assertIsNone(pat.match(b'\xe0'))
Antoine Pitroufd036452008-08-19 17:56:33 +00001201 # Incompatibilities
1202 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
1203 self.assertRaises(ValueError, re.compile, b'(?u)\w')
1204 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
1205 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
1206 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
1207 self.assertRaises(ValueError, re.compile, '(?au)\w')
1208
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001209 def test_locale_flag(self):
1210 import locale
1211 _, enc = locale.getlocale(locale.LC_CTYPE)
1212 # Search non-ASCII letter
1213 for i in range(128, 256):
1214 try:
1215 c = bytes([i]).decode(enc)
1216 sletter = c.lower()
1217 if sletter == c: continue
1218 bletter = sletter.encode(enc)
1219 if len(bletter) != 1: continue
1220 if bletter.decode(enc) != sletter: continue
1221 bpat = re.escape(bytes([i]))
1222 break
1223 except (UnicodeError, TypeError):
1224 pass
1225 else:
1226 bletter = None
1227 bpat = b'A'
1228 # Bytes patterns
1229 pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
1230 if bletter:
1231 self.assertTrue(pat.match(bletter))
1232 pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
1233 if bletter:
1234 self.assertTrue(pat.match(bletter))
1235 pat = re.compile(bpat, re.IGNORECASE)
1236 if bletter:
1237 self.assertIsNone(pat.match(bletter))
1238 pat = re.compile(b'\w', re.LOCALE)
1239 if bletter:
1240 self.assertTrue(pat.match(bletter))
1241 pat = re.compile(b'(?L)\w')
1242 if bletter:
1243 self.assertTrue(pat.match(bletter))
1244 pat = re.compile(b'\w')
1245 if bletter:
1246 self.assertIsNone(pat.match(bletter))
1247 # Incompatibilities
1248 self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
1249 self.assertWarns(DeprecationWarning, re.compile, '(?L)')
1250 self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
1251 self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
1252 self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
1253 self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
1254
Ezio Melottib92ed7c2010-03-06 15:24:08 +00001255 def test_bug_6509(self):
1256 # Replacement strings of both types must parse properly.
1257 # all strings
1258 pat = re.compile('a(\w)')
1259 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
1260 pat = re.compile('a(.)')
1261 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
1262 pat = re.compile('..')
1263 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
1264
1265 # all bytes
1266 pat = re.compile(b'a(\w)')
1267 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
1268 pat = re.compile(b'a(.)')
1269 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
1270 pat = re.compile(b'..')
1271 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
1272
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001273 def test_dealloc(self):
1274 # issue 3299: check for segfault in debug build
1275 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +00001276 # the overflow limit is different on wide and narrow builds and it
1277 # depends on the definition of SRE_CODE (see sre.h).
1278 # 2**128 should be big enough to overflow on both. For smaller values
1279 # a RuntimeError is raised instead of OverflowError.
1280 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001281 self.assertRaises(TypeError, re.finditer, "a", {})
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001282 with self.assertRaises(OverflowError):
1283 _sre.compile("abc", 0, [long_overflow], 0, [], [])
1284 with self.assertRaises(TypeError):
1285 _sre.compile({}, 0, [], 0, [], [])
Christian Heimes072c0f12008-01-03 23:01:04 +00001286
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001287 def test_search_dot_unicode(self):
Serhiy Storchakad9cf65f2014-09-14 16:20:20 +03001288 self.assertTrue(re.search("123.*-", '123abc-'))
1289 self.assertTrue(re.search("123.*-", '123\xe9-'))
1290 self.assertTrue(re.search("123.*-", '123\u20ac-'))
1291 self.assertTrue(re.search("123.*-", '123\U0010ffff-'))
1292 self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293
Ezio Melottidf723e12012-03-13 01:29:48 +02001294 def test_compile(self):
1295 # Test return value when given string and pattern as parameter
1296 pattern = re.compile('random pattern')
1297 self.assertIsInstance(pattern, re._pattern_type)
1298 same_pattern = re.compile(pattern)
1299 self.assertIsInstance(same_pattern, re._pattern_type)
1300 self.assertIs(same_pattern, pattern)
1301 # Test behaviour when not given a string or pattern as parameter
1302 self.assertRaises(TypeError, re.compile, 0)
1303
Ezio Melottife8e6e72013-01-11 08:32:01 +02001304 def test_bug_13899(self):
1305 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
1306 # nothing. Ditto B and Z.
1307 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1308 ['A', 'B', '\b', 'C', 'Z'])
1309
Antoine Pitroub33941a2012-12-03 20:55:56 +01001310 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001311 def test_large_search(self, size):
1312 # Issue #10182: indices were 32-bit-truncated.
1313 s = 'a' * size
1314 m = re.search('$', s)
1315 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +01001316 self.assertEqual(m.start(), size)
1317 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001318
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001319 # The huge memuse is because of re.sub() using a list and a join()
1320 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +01001321 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +01001322 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001323 # Issue #10182: indices were 32-bit-truncated.
1324 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001325 r, n = re.subn('', '', s)
1326 self.assertEqual(r, s)
1327 self.assertEqual(n, size + 1)
1328
Serhiy Storchakac1b59d42012-12-29 23:38:48 +02001329 def test_bug_16688(self):
1330 # Issue 16688: Backreferences make case-insensitive regex fail on
1331 # non-ASCII strings.
1332 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1333 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001334
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001335 def test_repeat_minmax_overflow(self):
1336 # Issue #13169
1337 string = "x" * 100000
1338 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1339 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1340 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1341 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1342 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1343 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1344 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1345 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1346 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1347 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1348 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
1349
1350 @cpython_only
1351 def test_repeat_minmax_overflow_maxrepeat(self):
1352 try:
1353 from _sre import MAXREPEAT
1354 except ImportError:
1355 self.skipTest('requires _sre.MAXREPEAT constant')
1356 string = "x" * 100000
1357 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1358 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1359 (0, 100000))
1360 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1361 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1362 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1363 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1364
R David Murray26dfaac92013-04-14 13:00:54 -04001365 def test_backref_group_name_in_exception(self):
1366 # Issue 17341: Poor error message when compiling invalid regex
1367 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1368 re.compile('(?P=<foo>)')
1369
1370 def test_group_name_in_exception(self):
1371 # Issue 17341: Poor error message when compiling invalid regex
1372 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1373 re.compile('(?P<?foo>)')
1374
Serhiy Storchaka1f35ae02013-08-03 19:18:38 +03001375 def test_issue17998(self):
1376 for reps in '*', '+', '?', '{1}':
1377 for mod in '', '?':
1378 pattern = '.' + reps + mod + 'yz'
1379 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1380 ['xyz'], msg=pattern)
1381 pattern = pattern.encode()
1382 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1383 [b'xyz'], msg=pattern)
1384
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03001385 def test_match_repr(self):
1386 for string in '[abracadabra]', S('[abracadabra]'):
1387 m = re.search(r'(.+)(.*?)\1', string)
1388 self.assertEqual(repr(m), "<%s.%s object; "
1389 "span=(1, 12), match='abracadabra'>" %
1390 (type(m).__module__, type(m).__qualname__))
1391 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1392 bytearray(b'[abracadabra]'),
1393 memoryview(b'[abracadabra]')):
1394 m = re.search(rb'(.+)(.*?)\1', string)
1395 self.assertEqual(repr(m), "<%s.%s object; "
1396 "span=(1, 12), match=b'abracadabra'>" %
1397 (type(m).__module__, type(m).__qualname__))
1398
1399 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1400 self.assertEqual(repr(first), "<%s.%s object; "
1401 "span=(0, 2), match='aa'>" %
1402 (type(second).__module__, type(first).__qualname__))
1403 self.assertEqual(repr(second), "<%s.%s object; "
1404 "span=(3, 5), match='bb'>" %
1405 (type(second).__module__, type(second).__qualname__))
1406
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001407
Serhiy Storchaka98985a12013-08-19 23:18:23 +03001408 def test_bug_2537(self):
1409 # issue 2537: empty submatches
1410 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1411 for inner_op in ('{0,}', '*', '?'):
1412 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1413 m = r.match("xyyzy")
1414 self.assertEqual(m.group(0), "xyy")
1415 self.assertEqual(m.group(1), "")
1416 self.assertEqual(m.group(2), "y")
1417
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001418 def test_debug_flag(self):
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001419 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001420 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001421 re.compile(pat, re.DEBUG)
1422 dump = '''\
Serhiy Storchakac7f7d382014-11-09 20:48:36 +02001423SUBPATTERN 1
1424 LITERAL 46
1425SUBPATTERN None
1426 BRANCH
1427 IN
1428 LITERAL 99
1429 LITERAL 104
1430 OR
1431 LITERAL 112
1432 LITERAL 121
1433SUBPATTERN None
1434 GROUPREF_EXISTS 1
1435 AT AT_END
1436 ELSE
1437 LITERAL 58
1438 LITERAL 32
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001439'''
1440 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001441 # Debug output is output again even a second time (bypassing
1442 # the cache -- issue #20426).
1443 with captured_stdout() as out:
Serhiy Storchaka44dae8b2014-09-21 22:47:55 +03001444 re.compile(pat, re.DEBUG)
1445 self.assertEqual(out.getvalue(), dump)
Antoine Pitroud2cc7432014-02-03 20:59:59 +01001446
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001447 def test_keyword_parameters(self):
1448 # Issue #20283: Accepting the string keyword parameter.
1449 pat = re.compile(r'(ab)')
1450 self.assertEqual(
1451 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
1452 self.assertEqual(
Serhiy Storchakaa537eb42014-03-06 11:36:15 +02001453 pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
1454 self.assertEqual(
Serhiy Storchakaccdf3522014-03-06 11:28:32 +02001455 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
1456 self.assertEqual(
1457 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
1458 self.assertEqual(
1459 pat.split(string='abracadabra', maxsplit=1),
1460 ['', 'ab', 'racadabra'])
1461 self.assertEqual(
1462 pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
1463 (7, 9))
1464
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03001465 def test_bug_20998(self):
1466 # Issue #20998: Fullmatch of repeated single character pattern
1467 # with ignore case.
1468 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
1469
Serhiy Storchaka4659cc02014-10-31 00:53:49 +02001470 def test_locale_caching(self):
1471 # Issue #22410
1472 oldlocale = locale.setlocale(locale.LC_CTYPE)
1473 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1474 for loc in 'en_US.iso88591', 'en_US.utf8':
1475 try:
1476 locale.setlocale(locale.LC_CTYPE, loc)
1477 except locale.Error:
1478 # Unsupported locale on this system
1479 self.skipTest('test needs %s locale' % loc)
1480
1481 re.purge()
1482 self.check_en_US_iso88591()
1483 self.check_en_US_utf8()
1484 re.purge()
1485 self.check_en_US_utf8()
1486 self.check_en_US_iso88591()
1487
1488 def check_en_US_iso88591(self):
1489 locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
1490 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1491 self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
1492 self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
1493 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1494 self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
1495 self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
1496
1497 def check_en_US_utf8(self):
1498 locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
1499 self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
1500 self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
1501 self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
1502 self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
1503 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
1504 self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
1505
Serhiy Storchakaad446d52014-11-10 13:49:00 +02001506 def test_error(self):
1507 with self.assertRaises(re.error) as cm:
1508 re.compile('(\u20ac))')
1509 err = cm.exception
1510 self.assertIsInstance(err.pattern, str)
1511 self.assertEqual(err.pattern, '(\u20ac))')
1512 self.assertEqual(err.pos, 3)
1513 self.assertEqual(err.lineno, 1)
1514 self.assertEqual(err.colno, 4)
1515 self.assertIn(err.msg, str(err))
1516 self.assertIn(' at position 3', str(err))
1517 self.assertNotIn(' at position 3', err.msg)
1518 # Bytes pattern
1519 with self.assertRaises(re.error) as cm:
1520 re.compile(b'(\xa4))')
1521 err = cm.exception
1522 self.assertIsInstance(err.pattern, bytes)
1523 self.assertEqual(err.pattern, b'(\xa4))')
1524 self.assertEqual(err.pos, 3)
1525 # Multiline pattern
1526 with self.assertRaises(re.error) as cm:
1527 re.compile("""
1528 (
1529 abc
1530 )
1531 )
1532 (
1533 """, re.VERBOSE)
1534 err = cm.exception
1535 self.assertEqual(err.pos, 77)
1536 self.assertEqual(err.lineno, 5)
1537 self.assertEqual(err.colno, 17)
1538 self.assertIn(err.msg, str(err))
1539 self.assertIn(' at position 77', str(err))
1540 self.assertIn('(line 5, column 17)', str(err))
1541
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001542
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001543class PatternReprTests(unittest.TestCase):
1544 def check(self, pattern, expected):
1545 self.assertEqual(repr(re.compile(pattern)), expected)
1546
1547 def check_flags(self, pattern, flags, expected):
1548 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1549
1550 def test_without_flags(self):
1551 self.check('random pattern',
1552 "re.compile('random pattern')")
1553
1554 def test_single_flag(self):
1555 self.check_flags('random pattern', re.IGNORECASE,
1556 "re.compile('random pattern', re.IGNORECASE)")
1557
1558 def test_multiple_flags(self):
1559 self.check_flags('random pattern', re.I|re.S|re.X,
1560 "re.compile('random pattern', "
1561 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1562
1563 def test_unicode_flag(self):
1564 self.check_flags('random pattern', re.U,
1565 "re.compile('random pattern')")
1566 self.check_flags('random pattern', re.I|re.S|re.U,
1567 "re.compile('random pattern', "
1568 "re.IGNORECASE|re.DOTALL)")
1569
1570 def test_inline_flags(self):
1571 self.check('(?i)pattern',
1572 "re.compile('(?i)pattern', re.IGNORECASE)")
1573
1574 def test_unknown_flags(self):
1575 self.check_flags('random pattern', 0x123000,
1576 "re.compile('random pattern', 0x123000)")
1577 self.check_flags('random pattern', 0x123000|re.I,
1578 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1579
1580 def test_bytes(self):
1581 self.check(b'bytes pattern',
1582 "re.compile(b'bytes pattern')")
1583 self.check_flags(b'bytes pattern', re.A,
1584 "re.compile(b'bytes pattern', re.ASCII)")
1585
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001586 def test_locale(self):
1587 self.check_flags(b'bytes pattern', re.L,
1588 "re.compile(b'bytes pattern', re.LOCALE)")
1589
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001590 def test_quotes(self):
1591 self.check('random "double quoted" pattern',
1592 '''re.compile('random "double quoted" pattern')''')
1593 self.check("random 'single quoted' pattern",
1594 '''re.compile("random 'single quoted' pattern")''')
1595 self.check('''both 'single' and "double" quotes''',
1596 '''re.compile('both \\'single\\' and "double" quotes')''')
1597
1598 def test_long_pattern(self):
1599 pattern = 'Very %spattern' % ('long ' * 1000)
1600 r = repr(re.compile(pattern))
1601 self.assertLess(len(r), 300)
1602 self.assertEqual(r[:30], "re.compile('Very long long lon")
1603 r = repr(re.compile(pattern, re.I))
1604 self.assertLess(len(r), 300)
1605 self.assertEqual(r[:30], "re.compile('Very long long lon")
1606 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1607
1608
Antoine Pitrou79aa68d2013-10-25 21:36:10 +02001609class ImplementationTest(unittest.TestCase):
1610 """
1611 Test implementation details of the re module.
1612 """
1613
1614 def test_overlap_table(self):
1615 f = sre_compile._generate_overlap_table
1616 self.assertEqual(f(""), [])
1617 self.assertEqual(f("a"), [0])
1618 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1619 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1620 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1621 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1622
1623
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001624class ExternalTests(unittest.TestCase):
Guido van Rossum8e0ce301997-07-11 19:34:44 +00001625
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001626 def test_re_benchmarks(self):
1627 're_tests benchmarks'
1628 from test.re_tests import benchmarks
1629 for pattern, s in benchmarks:
1630 with self.subTest(pattern=pattern, string=s):
1631 p = re.compile(pattern)
1632 self.assertTrue(p.search(s))
1633 self.assertTrue(p.match(s))
1634 self.assertTrue(p.fullmatch(s))
1635 s2 = ' '*10000 + s + ' '*10000
1636 self.assertTrue(p.search(s2))
1637 self.assertTrue(p.match(s2, 10000))
1638 self.assertTrue(p.match(s2, 10000, 10000 + len(s)))
1639 self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s)))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001640
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001641 def test_re_tests(self):
1642 're_tests test suite'
1643 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
1644 for t in tests:
1645 pattern = s = outcome = repl = expected = None
1646 if len(t) == 5:
1647 pattern, s, outcome, repl, expected = t
1648 elif len(t) == 3:
1649 pattern, s, outcome = t
Guido van Rossum41360a41998-03-26 19:42:58 +00001650 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001651 raise ValueError('Test tuples should have 3 or 5 fields', t)
1652
1653 with self.subTest(pattern=pattern, string=s):
1654 if outcome == SYNTAX_ERROR: # Expected a syntax error
1655 with self.assertRaises(re.error):
1656 re.compile(pattern)
1657 continue
1658
1659 obj = re.compile(pattern)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001660 result = obj.search(s)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001661 if outcome == FAIL:
1662 self.assertIsNone(result, 'Succeeded incorrectly')
1663 continue
1664
1665 with self.subTest():
1666 self.assertTrue(result, 'Failed incorrectly')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001667 # Matched, as expected, so now we compute the
1668 # result string and compare it to our expected result.
1669 start, end = result.span(0)
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001670 vardict = {'found': result.group(0),
1671 'groups': result.group(),
1672 'flags': result.re.flags}
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001673 for i in range(1, 100):
1674 try:
1675 gi = result.group(i)
1676 # Special hack because else the string concat fails:
1677 if gi is None:
1678 gi = "None"
1679 except IndexError:
1680 gi = "Error"
1681 vardict['g%d' % i] = gi
1682 for i in result.re.groupindex.keys():
1683 try:
1684 gi = result.group(i)
1685 if gi is None:
1686 gi = "None"
1687 except IndexError:
1688 gi = "Error"
1689 vardict[i] = gi
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001690 self.assertEqual(eval(repl, vardict), expected,
1691 'grouping error')
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001692
Antoine Pitrou22628c42008-07-22 17:53:22 +00001693 # Try the match with both pattern and string converted to
1694 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001695 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001696 bpat = bytes(pattern, "ascii")
1697 bs = bytes(s, "ascii")
1698 except UnicodeEncodeError:
1699 # skip non-ascii tests
1700 pass
1701 else:
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001702 with self.subTest('bytes pattern match'):
Serhiy Storchaka22a309a2014-12-01 11:50:07 +02001703 obj = re.compile(bpat)
1704 self.assertTrue(obj.search(bs))
1705
1706 # Try the match with LOCALE enabled, and check that it
1707 # still succeeds.
1708 with self.subTest('locale-sensitive match'):
1709 obj = re.compile(bpat, re.LOCALE)
1710 result = obj.search(bs)
1711 if result is None:
1712 print('=== Fails on locale-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001713
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001714 # Try the match with the search area limited to the extent
1715 # of the match and see if it still succeeds. \B will
1716 # break (because it won't match at the end or start of a
1717 # string), so we'll ignore patterns that feature it.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001718 if (pattern[:2] != r'\B' and pattern[-2:] != r'\B'
1719 and result is not None):
1720 with self.subTest('range-limited match'):
1721 obj = re.compile(pattern)
1722 self.assertTrue(obj.search(s, start, end + 1))
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001723
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001724 # Try the match with IGNORECASE enabled, and check that it
1725 # still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001726 with self.subTest('case-insensitive match'):
1727 obj = re.compile(pattern, re.IGNORECASE)
1728 self.assertTrue(obj.search(s))
Guido van Rossumdfa67901997-12-08 17:12:06 +00001729
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001730 # Try the match with UNICODE locale enabled, and check
1731 # that it still succeeds.
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001732 with self.subTest('unicode-sensitive match'):
1733 obj = re.compile(pattern, re.UNICODE)
1734 self.assertTrue(obj.search(s))
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001735
Gregory P. Smith5a631832010-07-27 05:31:29 +00001736
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001737if __name__ == "__main__":
Serhiy Storchaka9cba9892014-12-01 11:06:45 +02001738 unittest.main()