blob: feae7c55075c350913c588fa97d880acd50c029a [file] [log] [blame]
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
Benjamin Petersone48944b2012-03-07 14:50:25 -06002import io
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00004from re import Scanner
Ezio Melottid2114eb2011-03-25 14:08:44 +02005import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Ezio Melotti42da6632011-03-15 05:18:48 +020013# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000014# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
Benjamin Petersone48944b2012-03-07 14:50:25 -060020 def test_keep_buffer(self):
21 # See bug 14212
22 b = bytearray(b'x')
23 it = re.finditer(b'a', b)
24 with self.assertRaises(BufferError):
25 b.extend(b'x'*400)
26 list(it)
27 del it
28 gc_collect()
29 b.extend(b'x'*400)
30
Raymond Hettinger027bb632004-05-31 03:09:25 +000031 def test_weakref(self):
32 s = 'QabbbcR'
33 x = re.compile('ab+c')
34 y = proxy(x)
35 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
36
Skip Montanaro8ed06da2003-04-24 19:43:18 +000037 def test_search_star_plus(self):
38 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
39 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
40 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
41 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000042 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000043 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
44 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
45 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
46 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000047 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000050 int_value = int(matchobj.group(0))
51 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000052
Skip Montanaro8ed06da2003-04-24 19:43:18 +000053 def test_basic_re_sub(self):
54 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
55 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
56 '9.3 -3 24x100y')
57 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
58 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
61 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000062
Skip Montanaro8ed06da2003-04-24 19:43:18 +000063 s = r"\1\1"
64 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
65 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
66 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
69 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
70 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
71 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000072
Skip Montanaro8ed06da2003-04-24 19:43:18 +000073 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
74 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
75 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
76 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
77 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000078
Skip Montanaro8ed06da2003-04-24 19:43:18 +000079 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000080
Skip Montanaro2726fcd2003-04-25 14:31:54 +000081 def test_bug_449964(self):
82 # fails for group followed by other escape
83 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
84 'xx\bxx\b')
85
86 def test_bug_449000(self):
87 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000088 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
89 'abc\ndef\n')
90 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
91 'abc\ndef\n')
92 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
93 'abc\ndef\n')
94 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
95 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000096
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000097 def test_bug_1661(self):
98 # Verify that flags do not get silently ignored with compiled patterns
99 pattern = re.compile('.')
100 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
101 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
102 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
103 self.assertRaises(ValueError, re.compile, pattern, re.I)
104
Guido van Rossum92f8f3e2008-09-10 14:30:50 +0000105 def test_bug_3629(self):
106 # A regex that triggered a bug in the sre-code validator
107 re.compile("(?P<quote>)(?(quote))")
108
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000109 def test_sub_template_numeric_escape(self):
110 # bug 776311 and friends
111 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
112 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
113 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
114 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
115 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
116 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
117 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
118
119 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
120 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
121
122 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
123 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
124 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
125 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
126 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
127
128 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
129 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000130
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000131 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
132 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
133 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
134 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
140 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
142 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
143
144 # in python2.3 (etc), these loop endlessly in sre_parser.py
145 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
146 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
147 'xz8')
148 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
149 'xza')
150
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000151 def test_qualified_re_sub(self):
152 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
153 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000154
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000155 def test_bug_114660(self):
156 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
157 'hello there')
158
159 def test_bug_462270(self):
160 # Test for empty sub() behaviour, see SF bug #462270
161 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
162 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
163
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200164 def test_symbolic_groups(self):
165 re.compile('(?P<a>x)(?P=a)(?(a)y)')
166 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
167 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
168 self.assertRaises(re.error, re.compile, '(?Px)')
169 self.assertRaises(re.error, re.compile, '(?P=)')
170 self.assertRaises(re.error, re.compile, '(?P=1)')
171 self.assertRaises(re.error, re.compile, '(?P=a)')
172 self.assertRaises(re.error, re.compile, '(?P=a1)')
173 self.assertRaises(re.error, re.compile, '(?P=a.)')
174 self.assertRaises(re.error, re.compile, '(?P<)')
175 self.assertRaises(re.error, re.compile, '(?P<>)')
176 self.assertRaises(re.error, re.compile, '(?P<1>)')
177 self.assertRaises(re.error, re.compile, '(?P<a.>)')
178 self.assertRaises(re.error, re.compile, '(?())')
179 self.assertRaises(re.error, re.compile, '(?(a))')
180 self.assertRaises(re.error, re.compile, '(?(1a))')
181 self.assertRaises(re.error, re.compile, '(?(a.))')
182
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000183 def test_symbolic_refs(self):
184 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
186 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melotti0941d9f2012-11-03 20:33:08 +0200188 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000189 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
190 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
191 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
192 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000193 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000194
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000195 def test_re_subn(self):
196 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
197 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
198 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
199 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
200 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000201
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000202 def test_re_split(self):
203 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
204 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
205 self.assertEqual(re.split("(:*)", ":a:b::c"),
206 ['', ':', 'a', ':', 'b', '::', 'c'])
207 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
208 self.assertEqual(re.split("(:)*", ":a:b::c"),
209 ['', ':', 'a', ':', 'b', ':', 'c'])
210 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
211 ['', ':', 'a', ':b::', 'c'])
212 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
213 ['', None, ':', 'a', None, ':', '', 'b', None, '',
214 None, '::', 'c'])
215 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
216 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000217
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000218 def test_qualified_re_split(self):
219 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
220 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
221 self.assertEqual(re.split("(:)", ":a:b::c", 2),
222 ['', ':', 'a', ':', 'b::c'])
223 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
224 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000225
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000226 def test_re_findall(self):
227 self.assertEqual(re.findall(":+", "abc"), [])
228 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
229 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
230 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
231 (":", ":"),
232 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000233
Skip Montanaro5ba00542003-04-25 16:00:14 +0000234 def test_bug_117612(self):
235 self.assertEqual(re.findall(r"(a|(b))", "aba"),
236 [("a", ""),("b", "b"),("a", "")])
237
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000238 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000239 self.assertEqual(re.match('a', 'a').groups(), ())
240 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
241 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
242 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
243 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000244
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000245 pat = re.compile('((a)|(b))(c)?')
246 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
247 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
248 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
249 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
250 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000251
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000252 # A single group
253 m = re.match('(a)', 'a')
254 self.assertEqual(m.group(0), 'a')
255 self.assertEqual(m.group(0), 'a')
256 self.assertEqual(m.group(1), 'a')
257 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000258
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000259 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
260 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
261 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
262 (None, 'b', None))
263 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000264
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000265 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000266 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
267 ('(', 'a'))
268 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
269 (None, 'a'))
270 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
271 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
272 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
273 ('a', 'b'))
274 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
275 (None, 'd'))
276 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
277 (None, 'd'))
278 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
279 ('a', ''))
280
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000281 # Tests for bug #1177831: exercise groups other than the first group
282 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
283 self.assertEqual(p.match('abc').groups(),
284 ('a', 'b', 'c'))
285 self.assertEqual(p.match('ad').groups(),
286 ('a', None, 'd'))
287 self.assertEqual(p.match('abd'), None)
288 self.assertEqual(p.match('ac'), None)
289
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000290
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000291 def test_re_groupref(self):
292 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
293 ('|', 'a'))
294 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
295 (None, 'a'))
296 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
297 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
298 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
299 ('a', 'a'))
300 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
301 (None, None))
302
303 def test_groupdict(self):
304 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
305 'first second').groupdict(),
306 {'first':'first', 'second':'second'})
307
308 def test_expand(self):
309 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
310 "first second")
311 .expand(r"\2 \1 \g<second> \g<first>"),
312 "second first second first")
313
314 def test_repeat_minmax(self):
315 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
316 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
317 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
318 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
319
320 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
321 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
322 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
323 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
324 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
325 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
326 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
327 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
328
329 self.assertEqual(re.match("^x{1}$", "xxx"), None)
330 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
331 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
332 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
333
334 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
335 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
336 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
337 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
338 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
339 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
340 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
341 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
342
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000343 self.assertEqual(re.match("^x{}$", "xxx"), None)
344 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
345
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000346 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000347 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000348 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000349 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
350 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
351 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
352 {'first': 1, 'other': 2})
353
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000354 self.assertEqual(re.match("(a)", "a").pos, 0)
355 self.assertEqual(re.match("(a)", "a").endpos, 1)
356 self.assertEqual(re.match("(a)", "a").string, "a")
357 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
358 self.assertNotEqual(re.match("(a)", "a").re, None)
359
360 def test_special_escapes(self):
361 self.assertEqual(re.search(r"\b(b.)\b",
362 "abcd abc bcd bx").group(1), "bx")
363 self.assertEqual(re.search(r"\B(b.)\B",
364 "abc bcd bc abxd").group(1), "bx")
365 self.assertEqual(re.search(r"\b(b.)\b",
366 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
367 self.assertEqual(re.search(r"\B(b.)\B",
368 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
369 self.assertEqual(re.search(r"\b(b.)\b",
370 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
371 self.assertEqual(re.search(r"\B(b.)\B",
372 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
373 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
374 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
375 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
376 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000377 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000378 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000379 "abc bcd bc abxd").group(1), "bx")
380 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
381 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
382 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000383 self.assertEqual(re.search(r"\d\D\w\W\s\S",
384 "1aa! a").group(0), "1aa! a")
385 self.assertEqual(re.search(r"\d\D\w\W\s\S",
386 "1aa! a", re.LOCALE).group(0), "1aa! a")
387 self.assertEqual(re.search(r"\d\D\w\W\s\S",
388 "1aa! a", re.UNICODE).group(0), "1aa! a")
389
Ezio Melotti5a045b92012-02-29 11:48:44 +0200390 def test_string_boundaries(self):
391 # See http://bugs.python.org/issue10713
392 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
393 "abc")
394 # There's a word boundary at the start of a string.
395 self.assertTrue(re.match(r"\b", "abc"))
396 # A non-empty string includes a non-boundary zero-length match.
397 self.assertTrue(re.search(r"\B", "abc"))
398 # There is no non-boundary match at the start of a string.
399 self.assertFalse(re.match(r"\B", "abc"))
400 # However, an empty string contains no word boundaries, and also no
401 # non-boundaries.
402 self.assertEqual(re.search(r"\B", ""), None)
403 # This one is questionable and different from the perlre behaviour,
404 # but describes current behavior.
405 self.assertEqual(re.search(r"\b", ""), None)
406 # A single word-character string has two boundaries, but no
407 # non-boundary gaps.
408 self.assertEqual(len(re.findall(r"\b", "a")), 2)
409 self.assertEqual(len(re.findall(r"\B", "a")), 0)
410 # If there are no words, there are no boundaries
411 self.assertEqual(len(re.findall(r"\b", " ")), 0)
412 self.assertEqual(len(re.findall(r"\b", " ")), 0)
413 # Can match around the whitespace.
414 self.assertEqual(len(re.findall(r"\B", " ")), 2)
415
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000416 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000417 self.assertEqual(re.match("([\u2222\u2223])",
418 "\u2222").group(1), "\u2222")
419 self.assertEqual(re.match("([\u2222\u2223])",
420 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000421
Antoine Pitrou39bdad82012-11-20 22:30:42 +0100422 def test_big_codesize(self):
423 # Issue #1160
424 r = re.compile('|'.join(('%d'%x for x in range(10000))))
425 self.assertIsNotNone(r.match('1000'))
426 self.assertIsNotNone(r.match('9999'))
427
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000428 def test_anyall(self):
429 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
430 "a\nb")
431 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
432 "a\n\nb")
433
434 def test_non_consuming(self):
435 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
436 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
437 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
438 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
439 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
440 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
441 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
442
443 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
444 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
445 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
446 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
447
448 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000449 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
450 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000451 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
452 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
453 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
454 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
455 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
456 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
457 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
458 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
459
460 def test_category(self):
461 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
462
463 def test_getlower(self):
464 import _sre
465 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
466 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
467 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
468
469 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000470 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000471
472 def test_not_literal(self):
473 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
474 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
475
476 def test_search_coverage(self):
477 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
478 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
479
Ezio Melottid2114eb2011-03-25 14:08:44 +0200480 def assertMatch(self, pattern, text, match=None, span=None,
481 matcher=re.match):
482 if match is None and span is None:
483 # the pattern matches the whole text
484 match = text
485 span = (0, len(text))
486 elif match is None or span is None:
487 raise ValueError('If match is not None, span should be specified '
488 '(and vice versa).')
489 m = matcher(pattern, text)
490 self.assertTrue(m)
491 self.assertEqual(m.group(), match)
492 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000493
Ezio Melottid2114eb2011-03-25 14:08:44 +0200494 def test_re_escape(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300495 alnum_chars = string.ascii_letters + string.digits + '_'
Ezio Melottid2114eb2011-03-25 14:08:44 +0200496 p = ''.join(chr(i) for i in range(256))
497 for c in p:
498 if c in alnum_chars:
499 self.assertEqual(re.escape(c), c)
500 elif c == '\x00':
501 self.assertEqual(re.escape(c), '\\000')
502 else:
503 self.assertEqual(re.escape(c), '\\' + c)
504 self.assertMatch(re.escape(c), c)
505 self.assertMatch(re.escape(p), p)
Guido van Rossum49946571997-07-18 04:26:25 +0000506
Guido van Rossum698280d2008-09-10 17:44:35 +0000507 def test_re_escape_byte(self):
Ezio Melotti88fdeb42011-04-10 12:59:16 +0300508 alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
Ezio Melottid2114eb2011-03-25 14:08:44 +0200509 p = bytes(range(256))
510 for i in p:
Guido van Rossum698280d2008-09-10 17:44:35 +0000511 b = bytes([i])
Ezio Melottid2114eb2011-03-25 14:08:44 +0200512 if b in alnum_chars:
513 self.assertEqual(re.escape(b), b)
514 elif i == 0:
515 self.assertEqual(re.escape(b), b'\\000')
516 else:
517 self.assertEqual(re.escape(b), b'\\' + b)
518 self.assertMatch(re.escape(b), b)
519 self.assertMatch(re.escape(p), p)
Guido van Rossum698280d2008-09-10 17:44:35 +0000520
Ezio Melotti7b9e97b2011-03-25 14:09:33 +0200521 def test_re_escape_non_ascii(self):
522 s = 'xxx\u2620\u2620\u2620xxx'
523 s_escaped = re.escape(s)
524 self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
525 self.assertMatch(s_escaped, s)
526 self.assertMatch('.%s+.' % re.escape('\u2620'), s,
527 'x\u2620\u2620\u2620x', (2, 7), re.search)
528
529 def test_re_escape_non_ascii_bytes(self):
530 b = 'y\u2620y\u2620y'.encode('utf-8')
531 b_escaped = re.escape(b)
532 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
533 self.assertMatch(b_escaped, b)
534 res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
535 self.assertEqual(len(res), 2)
Guido van Rossum698280d2008-09-10 17:44:35 +0000536
Skip Montanaro1e703c62003-04-25 15:40:28 +0000537 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000538 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
539 s = pickle.dumps(oldpat)
540 newpat = pickle.loads(s)
541 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000542
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000543 def test_constants(self):
544 self.assertEqual(re.I, re.IGNORECASE)
545 self.assertEqual(re.L, re.LOCALE)
546 self.assertEqual(re.M, re.MULTILINE)
547 self.assertEqual(re.S, re.DOTALL)
548 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000549
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000550 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000551 for flag in [re.I, re.M, re.X, re.S, re.L]:
552 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000553
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000554 def test_sre_character_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200555 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
556 if i < 256:
557 self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
558 self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
559 self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
560 self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
561 self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
562 self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
563 if i < 0x10000:
564 self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
565 self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
566 self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
567 self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
568 self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
569 self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
570 self.assertIsNotNone(re.match(r"\0", "\000"))
571 self.assertIsNotNone(re.match(r"\08", "\0008"))
572 self.assertIsNotNone(re.match(r"\01", "\001"))
573 self.assertIsNotNone(re.match(r"\018", "\0018"))
574 self.assertIsNotNone(re.match(r"\567", chr(0o167)))
575 self.assertRaises(re.error, re.match, r"\911", "")
576 self.assertRaises(re.error, re.match, r"\x1", "")
577 self.assertRaises(re.error, re.match, r"\x1z", "")
578 self.assertRaises(re.error, re.match, r"\u123", "")
579 self.assertRaises(re.error, re.match, r"\u123z", "")
580 self.assertRaises(re.error, re.match, r"\U0001234", "")
581 self.assertRaises(re.error, re.match, r"\U0001234z", "")
582 self.assertRaises(re.error, re.match, r"\U00110000", "")
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000583
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000584 def test_sre_character_class_literals(self):
Antoine Pitrou463badf2012-06-23 13:29:19 +0200585 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
586 if i < 256:
587 self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
588 self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
589 self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
590 self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
591 self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
592 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
593 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
594 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
595 if i < 0x10000:
596 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
597 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
598 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
599 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
600 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
601 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
602 self.assertRaises(re.error, re.match, r"[\911]", "")
603 self.assertRaises(re.error, re.match, r"[\x1z]", "")
604 self.assertRaises(re.error, re.match, r"[\u123z]", "")
605 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
606 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
607
608 def test_sre_byte_literals(self):
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000609 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Antoine Pitrou463badf2012-06-23 13:29:19 +0200610 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
611 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
612 self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
613 self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
614 self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
615 self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
616 self.assertIsNotNone(re.match(br"\u", b'u'))
617 self.assertIsNotNone(re.match(br"\U", b'U'))
618 self.assertIsNotNone(re.match(br"\0", b"\000"))
619 self.assertIsNotNone(re.match(br"\08", b"\0008"))
620 self.assertIsNotNone(re.match(br"\01", b"\001"))
621 self.assertIsNotNone(re.match(br"\018", b"\0018"))
622 self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
623 self.assertRaises(re.error, re.match, br"\911", b"")
624 self.assertRaises(re.error, re.match, br"\x1", b"")
625 self.assertRaises(re.error, re.match, br"\x1z", b"")
626
627 def test_sre_byte_class_literals(self):
628 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
629 self.assertIsNotNone(re.match((r"[\%o]" % i).encode(), bytes([i])))
630 self.assertIsNotNone(re.match((r"[\%o8]" % i).encode(), bytes([i])))
631 self.assertIsNotNone(re.match((r"[\%03o]" % i).encode(), bytes([i])))
632 self.assertIsNotNone(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
633 self.assertIsNotNone(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
634 self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
635 self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
636 self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
637 self.assertIsNotNone(re.match(br"[\u]", b'u'))
638 self.assertIsNotNone(re.match(br"[\U]", b'U'))
639 self.assertRaises(re.error, re.match, br"[\911]", "")
640 self.assertRaises(re.error, re.match, br"[\x1z]", "")
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000641
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000642 def test_bug_113254(self):
643 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
644 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
645 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
646
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000647 def test_bug_527371(self):
648 # bug described in patches 527371/672491
649 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
650 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
651 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
652 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
653 self.assertEqual(re.match("((a))", "a").lastindex, 1)
654
655 def test_bug_545855(self):
656 # bug 545855 -- This pattern failed to cause a compile error as it
657 # should, instead provoking a TypeError.
658 self.assertRaises(re.error, re.compile, 'foo[a-')
659
660 def test_bug_418626(self):
661 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
662 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
663 # pattern '*?' on a long string.
664 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
665 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
666 20003)
667 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000668 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000669 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000670 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000671
672 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000673 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000674 self.assertEqual(re.compile(pat) and 1, 1)
675
Skip Montanaro1e703c62003-04-25 15:40:28 +0000676 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000677 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000678 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000679 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
680 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
681 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000682
683 def test_scanner(self):
684 def s_ident(scanner, token): return token
685 def s_operator(scanner, token): return "op%s" % token
686 def s_float(scanner, token): return float(token)
687 def s_int(scanner, token): return int(token)
688
689 scanner = Scanner([
690 (r"[a-zA-Z_]\w*", s_ident),
691 (r"\d+\.\d*", s_float),
692 (r"\d+", s_int),
693 (r"=|\+|-|\*|/", s_operator),
694 (r"\s+", None),
695 ])
696
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000697 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
698
Skip Montanaro1e703c62003-04-25 15:40:28 +0000699 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
700 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
701 'op+', 'bar'], ''))
702
Skip Montanaro5ba00542003-04-25 16:00:14 +0000703 def test_bug_448951(self):
704 # bug 448951 (similar to 429357, but with single char match)
705 # (Also test greedy matches.)
706 for op in '','?','*':
707 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
708 (None, None))
709 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
710 ('a:', 'a'))
711
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000712 def test_bug_725106(self):
713 # capturing groups in alternatives in repeats
714 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
715 ('b', 'a'))
716 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
717 ('c', 'b'))
718 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
719 ('b', None))
720 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
721 ('b', None))
722 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
723 ('b', 'a'))
724 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
725 ('c', 'b'))
726 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
727 ('b', None))
728 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
729 ('b', None))
730
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000731 def test_bug_725149(self):
732 # mark_stack_base restoring before restoring marks
733 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
734 ('a', None))
735 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
736 ('a', None, None))
737
Just van Rossum12723ba2003-07-02 20:03:04 +0000738 def test_bug_764548(self):
739 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000740 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000741 pat = re.compile(my_unicode("abc"))
742 self.assertEqual(pat.match("xyz"), None)
743
Skip Montanaro5ba00542003-04-25 16:00:14 +0000744 def test_finditer(self):
745 iter = re.finditer(r":+", "a:b::c:::d")
746 self.assertEqual([item.group(0) for item in iter],
747 [":", "::", ":::"])
748
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600749 pat = re.compile(r":+")
750 iter = pat.finditer("a:b::c:::d", 1, 10)
751 self.assertEqual([item.group(0) for item in iter],
752 [":", "::", ":::"])
753
754 pat = re.compile(r":+")
755 iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
756 self.assertEqual([item.group(0) for item in iter],
757 [":", "::", ":::"])
758
759 pat = re.compile(r":+")
760 iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
761 self.assertEqual([item.group(0) for item in iter],
762 [":", "::", ":::"])
763
764 pat = re.compile(r":+")
765 iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
766 self.assertEqual([item.group(0) for item in iter],
767 ["::", "::"])
768
Thomas Wouters40a088d2008-03-18 20:19:54 +0000769 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000770 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000771 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000772
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000773 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000774 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000775 self.assertEqual(re.compile(pattern).split("a.b.c"),
776 ['a','b','c'])
777
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000778 def test_bug_581080(self):
779 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000780 self.assertEqual(next(iter).span(), (1,2))
781 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000782
783 scanner = re.compile(r"\s").scanner("a b")
784 self.assertEqual(scanner.search().span(), (1, 2))
785 self.assertEqual(scanner.search(), None)
786
787 def test_bug_817234(self):
788 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000789 self.assertEqual(next(iter).span(), (0, 4))
790 self.assertEqual(next(iter).span(), (4, 4))
791 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000792
Mark Dickinson1f268282009-07-28 17:22:36 +0000793 def test_bug_6561(self):
794 # '\d' should match characters in Unicode category 'Nd'
795 # (Number, Decimal Digit), but not those in 'Nl' (Number,
796 # Letter) or 'No' (Number, Other).
797 decimal_digits = [
798 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
799 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
800 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
801 ]
802 for x in decimal_digits:
803 self.assertEqual(re.match('^\d$', x).group(0), x)
804
805 not_decimal_digits = [
806 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
807 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
808 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
809 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
810 ]
811 for x in not_decimal_digits:
812 self.assertIsNone(re.match('^\d$', x))
813
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 def test_empty_array(self):
815 # SF buf 1647541
816 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000817 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000819 self.assertEqual(re.compile(b"bla").match(a), None)
820 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000821
Christian Heimes072c0f12008-01-03 23:01:04 +0000822 def test_inline_flags(self):
823 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000824 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
825 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000826
827 p = re.compile(upper_char, re.I | re.U)
828 q = p.match(lower_char)
829 self.assertNotEqual(q, None)
830
831 p = re.compile(lower_char, re.I | re.U)
832 q = p.match(upper_char)
833 self.assertNotEqual(q, None)
834
835 p = re.compile('(?i)' + upper_char, re.U)
836 q = p.match(lower_char)
837 self.assertNotEqual(q, None)
838
839 p = re.compile('(?i)' + lower_char, re.U)
840 q = p.match(upper_char)
841 self.assertNotEqual(q, None)
842
843 p = re.compile('(?iu)' + upper_char)
844 q = p.match(lower_char)
845 self.assertNotEqual(q, None)
846
847 p = re.compile('(?iu)' + lower_char)
848 q = p.match(upper_char)
849 self.assertNotEqual(q, None)
850
Christian Heimes25bb7832008-01-11 16:17:00 +0000851 def test_dollar_matches_twice(self):
852 "$ matches the end of string, and just before the terminating \n"
853 pattern = re.compile('$')
854 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
855 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
856 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
857
858 pattern = re.compile('$', re.MULTILINE)
859 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
860 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
861 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
862
Antoine Pitroufd036452008-08-19 17:56:33 +0000863 def test_bytes_str_mixing(self):
864 # Mixing str and bytes is disallowed
865 pat = re.compile('.')
866 bpat = re.compile(b'.')
867 self.assertRaises(TypeError, pat.match, b'b')
868 self.assertRaises(TypeError, bpat.match, 'b')
869 self.assertRaises(TypeError, pat.sub, b'b', 'c')
870 self.assertRaises(TypeError, pat.sub, 'b', b'c')
871 self.assertRaises(TypeError, pat.sub, b'b', b'c')
872 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
873 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
874 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
875
876 def test_ascii_and_unicode_flag(self):
877 # String patterns
878 for flags in (0, re.UNICODE):
879 pat = re.compile('\xc0', flags | re.IGNORECASE)
880 self.assertNotEqual(pat.match('\xe0'), None)
881 pat = re.compile('\w', flags)
882 self.assertNotEqual(pat.match('\xe0'), None)
883 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
884 self.assertEqual(pat.match('\xe0'), None)
885 pat = re.compile('(?a)\xc0', re.IGNORECASE)
886 self.assertEqual(pat.match('\xe0'), None)
887 pat = re.compile('\w', re.ASCII)
888 self.assertEqual(pat.match('\xe0'), None)
889 pat = re.compile('(?a)\w')
890 self.assertEqual(pat.match('\xe0'), None)
891 # Bytes patterns
892 for flags in (0, re.ASCII):
893 pat = re.compile(b'\xc0', re.IGNORECASE)
894 self.assertEqual(pat.match(b'\xe0'), None)
895 pat = re.compile(b'\w')
896 self.assertEqual(pat.match(b'\xe0'), None)
897 # Incompatibilities
898 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
899 self.assertRaises(ValueError, re.compile, b'(?u)\w')
900 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
901 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
902 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
903 self.assertRaises(ValueError, re.compile, '(?au)\w')
904
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000905 def test_bug_6509(self):
906 # Replacement strings of both types must parse properly.
907 # all strings
908 pat = re.compile('a(\w)')
909 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
910 pat = re.compile('a(.)')
911 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
912 pat = re.compile('..')
913 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
914
915 # all bytes
916 pat = re.compile(b'a(\w)')
917 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
918 pat = re.compile(b'a(.)')
919 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
920 pat = re.compile(b'..')
921 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
922
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000923 def test_dealloc(self):
924 # issue 3299: check for segfault in debug build
925 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000926 # the overflow limit is different on wide and narrow builds and it
927 # depends on the definition of SRE_CODE (see sre.h).
928 # 2**128 should be big enough to overflow on both. For smaller values
929 # a RuntimeError is raised instead of OverflowError.
930 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000931 self.assertRaises(TypeError, re.finditer, "a", {})
932 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000933 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000934
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200935 def test_search_dot_unicode(self):
936 self.assertIsNotNone(re.search("123.*-", '123abc-'))
937 self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
938 self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
939 self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
940 self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
941
Ezio Melottidf723e12012-03-13 01:29:48 +0200942 def test_compile(self):
943 # Test return value when given string and pattern as parameter
944 pattern = re.compile('random pattern')
945 self.assertIsInstance(pattern, re._pattern_type)
946 same_pattern = re.compile(pattern)
947 self.assertIsInstance(same_pattern, re._pattern_type)
948 self.assertIs(same_pattern, pattern)
949 # Test behaviour when not given a string or pattern as parameter
950 self.assertRaises(TypeError, re.compile, 0)
951
Ezio Melottife8e6e72013-01-11 08:32:01 +0200952 def test_bug_13899(self):
953 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
954 # nothing. Ditto B and Z.
955 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
956 ['A', 'B', '\b', 'C', 'Z'])
957
Antoine Pitroub33941a2012-12-03 20:55:56 +0100958 @bigmemtest(size=_2G, memuse=1)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100959 def test_large_search(self, size):
960 # Issue #10182: indices were 32-bit-truncated.
961 s = 'a' * size
962 m = re.search('$', s)
963 self.assertIsNotNone(m)
Antoine Pitrou86067c22012-12-03 21:08:43 +0100964 self.assertEqual(m.start(), size)
965 self.assertEqual(m.end(), size)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100966
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100967 # The huge memuse is because of re.sub() using a list and a join()
968 # to create the replacement result.
Antoine Pitroub33941a2012-12-03 20:55:56 +0100969 @bigmemtest(size=_2G, memuse=16 + 2)
Antoine Pitrou1f1888e2012-12-03 20:53:12 +0100970 def test_large_subn(self, size):
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100971 # Issue #10182: indices were 32-bit-truncated.
972 s = 'a' * size
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100973 r, n = re.subn('', '', s)
974 self.assertEqual(r, s)
975 self.assertEqual(n, size + 1)
976
Serhiy Storchakac1b59d42012-12-29 23:38:48 +0200977 def test_bug_16688(self):
978 # Issue 16688: Backreferences make case-insensitive regex fail on
979 # non-ASCII strings.
980 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
981 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100982
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000983def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000984 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000985 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000986 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000987 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000988 # To save time, only run the first and last 10 tests
989 #tests = tests[:10] + tests[-10:]
990 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000991
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000992 for t in tests:
993 sys.stdout.flush()
994 pattern = s = outcome = repl = expected = None
995 if len(t) == 5:
996 pattern, s, outcome, repl, expected = t
997 elif len(t) == 3:
998 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000999 else:
Collin Winter3add4d72007-08-29 23:37:32 +00001000 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001001
Guido van Rossum41360a41998-03-26 19:42:58 +00001002 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001003 obj = re.compile(pattern)
1004 except re.error:
1005 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +00001006 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001007 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001008 except KeyboardInterrupt: raise KeyboardInterrupt
1009 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001010 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001011 if verbose:
1012 traceback.print_exc(file=sys.stdout)
1013 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001014 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001015 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +00001016 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001017 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001018 if outcome == SYNTAX_ERROR:
1019 # This should have been a syntax error; forget it.
1020 pass
1021 elif outcome == FAIL:
1022 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001023 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001024 elif outcome == SUCCEED:
1025 if result is not None:
1026 # Matched, as expected, so now we compute the
1027 # result string and compare it to our expected result.
1028 start, end = result.span(0)
1029 vardict={'found': result.group(0),
1030 'groups': result.group(),
1031 'flags': result.re.flags}
1032 for i in range(1, 100):
1033 try:
1034 gi = result.group(i)
1035 # Special hack because else the string concat fails:
1036 if gi is None:
1037 gi = "None"
1038 except IndexError:
1039 gi = "Error"
1040 vardict['g%d' % i] = gi
1041 for i in result.re.groupindex.keys():
1042 try:
1043 gi = result.group(i)
1044 if gi is None:
1045 gi = "None"
1046 except IndexError:
1047 gi = "Error"
1048 vardict[i] = gi
1049 repl = eval(repl, vardict)
1050 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001051 print('=== grouping error', t, end=' ')
1052 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001053 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001054 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001055
Antoine Pitrou22628c42008-07-22 17:53:22 +00001056 # Try the match with both pattern and string converted to
1057 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001058 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +00001059 bpat = bytes(pattern, "ascii")
1060 bs = bytes(s, "ascii")
1061 except UnicodeEncodeError:
1062 # skip non-ascii tests
1063 pass
1064 else:
1065 try:
1066 bpat = re.compile(bpat)
1067 except Exception:
1068 print('=== Fails on bytes pattern compile', t)
1069 if verbose:
1070 traceback.print_exc(file=sys.stdout)
1071 else:
1072 bytes_result = bpat.search(bs)
1073 if bytes_result is None:
1074 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001075
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001076 # Try the match with the search area limited to the extent
1077 # of the match and see if it still succeeds. \B will
1078 # break (because it won't match at the end or start of a
1079 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001080
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001081 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1082 and result is not None:
1083 obj = re.compile(pattern)
1084 result = obj.search(s, result.start(0), result.end(0) + 1)
1085 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001086 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001087
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001088 # Try the match with IGNORECASE enabled, and check that it
1089 # still succeeds.
1090 obj = re.compile(pattern, re.IGNORECASE)
1091 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001092 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001093 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001094
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001095 # Try the match with LOCALE enabled, and check that it
1096 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +00001097 if '(?u)' not in pattern:
1098 obj = re.compile(pattern, re.LOCALE)
1099 result = obj.search(s)
1100 if result is None:
1101 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +00001102
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001103 # Try the match with UNICODE locale enabled, and check
1104 # that it still succeeds.
1105 obj = re.compile(pattern, re.UNICODE)
1106 result = obj.search(s)
1107 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001108 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001109
Gregory P. Smith5a631832010-07-27 05:31:29 +00001110
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001111def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001112 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001113 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001114
1115if __name__ == "__main__":
1116 test_main()