blob: 1ede13372f9863045863f2e1fd68b813bd8e2fa1 [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Guido van Rossumaf554a02007-08-16 23:48:43 +00004from test.test_support import verbose, run_unittest, catch_warning
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Thomas Wouters1b7f8912007-09-19 03:06:30 +000086# This test makes no sense until re supports bytes, and should then probably
87# test for the *in*ability to mix bytes and str this way :)
88#
89# def test_bug_1140(self):
90# # re.sub(x, y, b'') should return b'', not '', and
91# # re.sub(x, y, '') should return '', not b''.
92# # Also:
93# # re.sub(x, y, str(x)) should return str(y), and
94# # re.sub(x, y, bytes(x)) should return
95# # str(y) if isinstance(y, str) else unicode(y).
96# for x in 'x', u'x':
97# for y in 'y', u'y':
98# z = re.sub(x, y, u'')
99# self.assertEqual(z, u'')
100# self.assertEqual(type(z), unicode)
101# #
102# z = re.sub(x, y, '')
103# self.assertEqual(z, '')
104# self.assertEqual(type(z), str)
105# #
106# z = re.sub(x, y, unicode(x))
107# self.assertEqual(z, y)
108# self.assertEqual(type(z), unicode)
109# #
110# z = re.sub(x, y, str(x))
111# self.assertEqual(z, y)
112# self.assertEqual(type(z), type(y))
113
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000114 def test_sub_template_numeric_escape(self):
115 # bug 776311 and friends
116 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
117 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
118 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
119 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
120 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
121 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
122 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
123
124 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
125 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
126
127 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
128 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
129 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
130 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
131 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
132
133 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
134 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000135
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000136 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
142 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
144 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
145 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
147 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
148
149 # in python2.3 (etc), these loop endlessly in sre_parser.py
150 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
151 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
152 'xz8')
153 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
154 'xza')
155
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000156 def test_qualified_re_sub(self):
157 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
158 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000159
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000160 def test_bug_114660(self):
161 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
162 'hello there')
163
164 def test_bug_462270(self):
165 # Test for empty sub() behaviour, see SF bug #462270
166 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
167 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
168
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000169 def test_symbolic_refs(self):
170 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
171 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
172 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
173 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
174 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
175 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
176 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
177 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000178 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000179
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000180 def test_re_subn(self):
181 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
182 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
183 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
184 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
185 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000186
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000187 def test_re_split(self):
188 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
189 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
190 self.assertEqual(re.split("(:*)", ":a:b::c"),
191 ['', ':', 'a', ':', 'b', '::', 'c'])
192 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
193 self.assertEqual(re.split("(:)*", ":a:b::c"),
194 ['', ':', 'a', ':', 'b', ':', 'c'])
195 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
196 ['', ':', 'a', ':b::', 'c'])
197 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
198 ['', None, ':', 'a', None, ':', '', 'b', None, '',
199 None, '::', 'c'])
200 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
201 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000202
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000203 def test_qualified_re_split(self):
204 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
205 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
206 self.assertEqual(re.split("(:)", ":a:b::c", 2),
207 ['', ':', 'a', ':', 'b::c'])
208 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
209 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 def test_re_findall(self):
212 self.assertEqual(re.findall(":+", "abc"), [])
213 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
214 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
215 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
216 (":", ":"),
217 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000218
Skip Montanaro5ba00542003-04-25 16:00:14 +0000219 def test_bug_117612(self):
220 self.assertEqual(re.findall(r"(a|(b))", "aba"),
221 [("a", ""),("b", "b"),("a", "")])
222
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000223 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000224 self.assertEqual(re.match('a', 'a').groups(), ())
225 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
226 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
227 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
228 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000229
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000230 pat = re.compile('((a)|(b))(c)?')
231 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
232 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
233 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
234 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
235 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000236
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000237 # A single group
238 m = re.match('(a)', 'a')
239 self.assertEqual(m.group(0), 'a')
240 self.assertEqual(m.group(0), 'a')
241 self.assertEqual(m.group(1), 'a')
242 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000243
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000244 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
245 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
246 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
247 (None, 'b', None))
248 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000249
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000250 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000251 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
252 ('(', 'a'))
253 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
254 (None, 'a'))
255 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
256 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
257 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
258 ('a', 'b'))
259 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
260 (None, 'd'))
261 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
262 (None, 'd'))
263 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
264 ('a', ''))
265
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000266 # Tests for bug #1177831: exercise groups other than the first group
267 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
268 self.assertEqual(p.match('abc').groups(),
269 ('a', 'b', 'c'))
270 self.assertEqual(p.match('ad').groups(),
271 ('a', None, 'd'))
272 self.assertEqual(p.match('abd'), None)
273 self.assertEqual(p.match('ac'), None)
274
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000275
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000276 def test_re_groupref(self):
277 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
278 ('|', 'a'))
279 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
280 (None, 'a'))
281 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
282 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
283 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
284 ('a', 'a'))
285 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
286 (None, None))
287
288 def test_groupdict(self):
289 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
290 'first second').groupdict(),
291 {'first':'first', 'second':'second'})
292
293 def test_expand(self):
294 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
295 "first second")
296 .expand(r"\2 \1 \g<second> \g<first>"),
297 "second first second first")
298
299 def test_repeat_minmax(self):
300 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
301 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
302 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
303 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
304
305 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
306 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
307 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
308 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
309 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
310 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
311 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
312 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
313
314 self.assertEqual(re.match("^x{1}$", "xxx"), None)
315 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
316 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
317 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
318
319 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
320 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
321 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
322 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
323 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
324 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
325 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
326 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
327
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000328 self.assertEqual(re.match("^x{}$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
330
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000331 def test_getattr(self):
332 self.assertEqual(re.match("(a)", "a").pos, 0)
333 self.assertEqual(re.match("(a)", "a").endpos, 1)
334 self.assertEqual(re.match("(a)", "a").string, "a")
335 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
336 self.assertNotEqual(re.match("(a)", "a").re, None)
337
338 def test_special_escapes(self):
339 self.assertEqual(re.search(r"\b(b.)\b",
340 "abcd abc bcd bx").group(1), "bx")
341 self.assertEqual(re.search(r"\B(b.)\B",
342 "abc bcd bc abxd").group(1), "bx")
343 self.assertEqual(re.search(r"\b(b.)\b",
344 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
345 self.assertEqual(re.search(r"\B(b.)\B",
346 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
347 self.assertEqual(re.search(r"\b(b.)\b",
348 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
349 self.assertEqual(re.search(r"\B(b.)\B",
350 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
351 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
352 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
353 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
354 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000355 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000356 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000357 "abc bcd bc abxd").group(1), "bx")
358 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
359 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
360 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361 self.assertEqual(re.search(r"\d\D\w\W\s\S",
362 "1aa! a").group(0), "1aa! a")
363 self.assertEqual(re.search(r"\d\D\w\W\s\S",
364 "1aa! a", re.LOCALE).group(0), "1aa! a")
365 self.assertEqual(re.search(r"\d\D\w\W\s\S",
366 "1aa! a", re.UNICODE).group(0), "1aa! a")
367
368 def test_ignore_case(self):
369 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000370 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000371
372 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000373 self.assertEqual(re.match("([\u2222\u2223])",
374 "\u2222").group(1), "\u2222")
375 self.assertEqual(re.match("([\u2222\u2223])",
376 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000377
378 def test_anyall(self):
379 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
380 "a\nb")
381 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
382 "a\n\nb")
383
384 def test_non_consuming(self):
385 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
386 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
387 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
388 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
389 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
390 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
391 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
392
393 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
394 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
395 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
396 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
397
398 def test_ignore_case(self):
399 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
400 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
401 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
402 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
403 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
404 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
405 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
406 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
407
408 def test_category(self):
409 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
410
411 def test_getlower(self):
412 import _sre
413 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
414 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
415 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
416
417 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000418 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000419
420 def test_not_literal(self):
421 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
422 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
423
424 def test_search_coverage(self):
425 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
426 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
427
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000428 def test_re_escape(self):
429 p=""
430 for i in range(0, 256):
431 p = p + chr(i)
432 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
433 True)
434 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000435
Skip Montanaro1e703c62003-04-25 15:40:28 +0000436 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000437 self.assertEqual(pat.match(p) is not None, True)
438 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000439
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000440 def test_pickling(self):
441 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000442 self.pickle_test(pickle)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443 # old pickles expect the _compile() reconstructor in sre module
444 import warnings
Guido van Rossumaf554a02007-08-16 23:48:43 +0000445 with catch_warning():
Guido van Rossumd8faa362007-04-27 19:54:29 +0000446 warnings.filterwarnings("ignore", "The sre module is deprecated",
447 DeprecationWarning)
448 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000449
450 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000451 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
452 s = pickle.dumps(oldpat)
453 newpat = pickle.loads(s)
454 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000455
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000456 def test_constants(self):
457 self.assertEqual(re.I, re.IGNORECASE)
458 self.assertEqual(re.L, re.LOCALE)
459 self.assertEqual(re.M, re.MULTILINE)
460 self.assertEqual(re.S, re.DOTALL)
461 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000462
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000463 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000464 for flag in [re.I, re.M, re.X, re.S, re.L]:
465 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000466
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000467 def test_sre_character_literals(self):
468 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
469 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
470 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
471 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
472 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
473 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
474 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
475 self.assertRaises(re.error, re.match, "\911", "")
476
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000477 def test_sre_character_class_literals(self):
478 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
479 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
480 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
481 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
482 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
483 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
484 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
485 self.assertRaises(re.error, re.match, "[\911]", "")
486
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000487 def test_bug_113254(self):
488 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
489 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
490 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
491
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000492 def test_bug_527371(self):
493 # bug described in patches 527371/672491
494 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
495 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
496 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
497 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
498 self.assertEqual(re.match("((a))", "a").lastindex, 1)
499
500 def test_bug_545855(self):
501 # bug 545855 -- This pattern failed to cause a compile error as it
502 # should, instead provoking a TypeError.
503 self.assertRaises(re.error, re.compile, 'foo[a-')
504
505 def test_bug_418626(self):
506 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
507 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
508 # pattern '*?' on a long string.
509 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
510 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
511 20003)
512 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000513 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000514 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000515 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000516
517 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000518 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000519 self.assertEqual(re.compile(pat) and 1, 1)
520
Skip Montanaro1e703c62003-04-25 15:40:28 +0000521 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000522 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000523 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000524 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
525 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
526 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000527
528 def test_scanner(self):
529 def s_ident(scanner, token): return token
530 def s_operator(scanner, token): return "op%s" % token
531 def s_float(scanner, token): return float(token)
532 def s_int(scanner, token): return int(token)
533
534 scanner = Scanner([
535 (r"[a-zA-Z_]\w*", s_ident),
536 (r"\d+\.\d*", s_float),
537 (r"\d+", s_int),
538 (r"=|\+|-|\*|/", s_operator),
539 (r"\s+", None),
540 ])
541
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000542 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
543
Skip Montanaro1e703c62003-04-25 15:40:28 +0000544 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
545 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
546 'op+', 'bar'], ''))
547
Skip Montanaro5ba00542003-04-25 16:00:14 +0000548 def test_bug_448951(self):
549 # bug 448951 (similar to 429357, but with single char match)
550 # (Also test greedy matches.)
551 for op in '','?','*':
552 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
553 (None, None))
554 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
555 ('a:', 'a'))
556
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000557 def test_bug_725106(self):
558 # capturing groups in alternatives in repeats
559 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
560 ('b', 'a'))
561 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
562 ('c', 'b'))
563 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
564 ('b', None))
565 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
566 ('b', None))
567 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
568 ('b', 'a'))
569 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
570 ('c', 'b'))
571 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
572 ('b', None))
573 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
574 ('b', None))
575
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000576 def test_bug_725149(self):
577 # mark_stack_base restoring before restoring marks
578 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
579 ('a', None))
580 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
581 ('a', None, None))
582
Just van Rossum12723ba2003-07-02 20:03:04 +0000583 def test_bug_764548(self):
584 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000585 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000586 pat = re.compile(my_unicode("abc"))
587 self.assertEqual(pat.match("xyz"), None)
588
Skip Montanaro5ba00542003-04-25 16:00:14 +0000589 def test_finditer(self):
590 iter = re.finditer(r":+", "a:b::c:::d")
591 self.assertEqual([item.group(0) for item in iter],
592 [":", "::", ":::"])
593
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000594 def test_bug_926075(self):
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000595 self.assert_(re.compile('bug_926075') is not
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000596 re.compile(str8('bug_926075')))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000597
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000598 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000599 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000600 self.assertEqual(re.compile(pattern).split("a.b.c"),
601 ['a','b','c'])
602
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000603 def test_bug_581080(self):
604 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000605 self.assertEqual(next(iter).span(), (1,2))
606 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000607
608 scanner = re.compile(r"\s").scanner("a b")
609 self.assertEqual(scanner.search().span(), (1, 2))
610 self.assertEqual(scanner.search(), None)
611
612 def test_bug_817234(self):
613 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000614 self.assertEqual(next(iter).span(), (0, 4))
615 self.assertEqual(next(iter).span(), (4, 4))
616 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000617
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618 def test_empty_array(self):
619 # SF buf 1647541
620 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000621 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000622 a = array.array(typecode)
623 self.assertEqual(re.compile("bla").match(a), None)
624 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000625
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000626def run_re_tests():
627 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
628 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000629 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000630 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000631 # To save time, only run the first and last 10 tests
632 #tests = tests[:10] + tests[-10:]
633 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000634
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000635 for t in tests:
636 sys.stdout.flush()
637 pattern = s = outcome = repl = expected = None
638 if len(t) == 5:
639 pattern, s, outcome, repl, expected = t
640 elif len(t) == 3:
641 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000642 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000643 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000644
Guido van Rossum41360a41998-03-26 19:42:58 +0000645 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000646 obj = re.compile(pattern)
647 except re.error:
648 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000649 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000650 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000651 except KeyboardInterrupt: raise KeyboardInterrupt
652 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000653 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000654 if verbose:
655 traceback.print_exc(file=sys.stdout)
656 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000657 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000658 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000659 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000660 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000661 if outcome == SYNTAX_ERROR:
662 # This should have been a syntax error; forget it.
663 pass
664 elif outcome == FAIL:
665 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000666 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000667 elif outcome == SUCCEED:
668 if result is not None:
669 # Matched, as expected, so now we compute the
670 # result string and compare it to our expected result.
671 start, end = result.span(0)
672 vardict={'found': result.group(0),
673 'groups': result.group(),
674 'flags': result.re.flags}
675 for i in range(1, 100):
676 try:
677 gi = result.group(i)
678 # Special hack because else the string concat fails:
679 if gi is None:
680 gi = "None"
681 except IndexError:
682 gi = "Error"
683 vardict['g%d' % i] = gi
684 for i in result.re.groupindex.keys():
685 try:
686 gi = result.group(i)
687 if gi is None:
688 gi = "None"
689 except IndexError:
690 gi = "Error"
691 vardict[i] = gi
692 repl = eval(repl, vardict)
693 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000694 print('=== grouping error', t, end=' ')
695 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000696 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000697 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000698
699 # Try the match on a unicode string, and check that it
700 # still succeeds.
701 try:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000702 result = obj.search(str(s, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000703 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000704 print('=== Fails on unicode match', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000705 except NameError:
706 continue # 1.5.2
707 except TypeError:
708 continue # unicode test case
709
710 # Try the match on a unicode pattern, and check that it
711 # still succeeds.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000712 obj=re.compile(str(pattern, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000713 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000714 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000715 print('=== Fails on unicode pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000716
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000717 # Try the match with the search area limited to the extent
718 # of the match and see if it still succeeds. \B will
719 # break (because it won't match at the end or start of a
720 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000721
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000722 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
723 and result is not None:
724 obj = re.compile(pattern)
725 result = obj.search(s, result.start(0), result.end(0) + 1)
726 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000727 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000728
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000729 # Try the match with IGNORECASE enabled, and check that it
730 # still succeeds.
731 obj = re.compile(pattern, re.IGNORECASE)
732 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000733 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000734 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000735
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000736 # Try the match with LOCALE enabled, and check that it
737 # still succeeds.
738 obj = re.compile(pattern, re.LOCALE)
739 result = obj.search(s)
740 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000741 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000742
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000743 # Try the match with UNICODE locale enabled, and check
744 # that it still succeeds.
745 obj = re.compile(pattern, re.UNICODE)
746 result = obj.search(s)
747 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000748 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000749
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000750def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000751 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000752 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000753
754if __name__ == "__main__":
755 test_main()