blob: 70bd88623d6000918960456163f4a3619396f00a [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Brett Cannon672237d2008-09-09 00:49:16 +00004from test.test_support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Guido van Rossum1ff91d92007-09-10 22:02:25 +000086 def test_bug_1140(self):
87 # re.sub(x, y, u'') should return u'', not '', and
88 # re.sub(x, y, '') should return '', not u''.
89 # Also:
90 # re.sub(x, y, unicode(x)) should return unicode(y), and
91 # re.sub(x, y, str(x)) should return
92 # str(y) if isinstance(y, str) else unicode(y).
93 for x in 'x', u'x':
94 for y in 'y', u'y':
95 z = re.sub(x, y, u'')
96 self.assertEqual(z, u'')
97 self.assertEqual(type(z), unicode)
98 #
99 z = re.sub(x, y, '')
100 self.assertEqual(z, '')
101 self.assertEqual(type(z), str)
102 #
103 z = re.sub(x, y, unicode(x))
104 self.assertEqual(z, y)
105 self.assertEqual(type(z), unicode)
106 #
107 z = re.sub(x, y, str(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), type(y))
110
Raymond Hettinger80016c92007-12-19 18:13:31 +0000111 def test_bug_1661(self):
112 # Verify that flags do not get silently ignored with compiled patterns
113 pattern = re.compile('.')
114 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.compile, pattern, re.I)
118
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000119 def test_sub_template_numeric_escape(self):
120 # bug 776311 and friends
121 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
122 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
123 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
124 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
125 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
126 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
127 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
128
129 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
130 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
131
132 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
133 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
134 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
135 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
136 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
137
138 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
139 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000140
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000141 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
142 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
144 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
145 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
150 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
152 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
153
154 # in python2.3 (etc), these loop endlessly in sre_parser.py
155 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
156 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
157 'xz8')
158 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
159 'xza')
160
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000161 def test_qualified_re_sub(self):
162 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
163 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000164
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000165 def test_bug_114660(self):
166 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
167 'hello there')
168
169 def test_bug_462270(self):
170 # Test for empty sub() behaviour, see SF bug #462270
171 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
172 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
173
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000174 def test_symbolic_refs(self):
175 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
176 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
177 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
178 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
180 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
181 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000183 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000184
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000185 def test_re_subn(self):
186 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
187 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
188 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
189 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
190 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000191
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000192 def test_re_split(self):
193 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
194 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
195 self.assertEqual(re.split("(:*)", ":a:b::c"),
196 ['', ':', 'a', ':', 'b', '::', 'c'])
197 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
198 self.assertEqual(re.split("(:)*", ":a:b::c"),
199 ['', ':', 'a', ':', 'b', ':', 'c'])
200 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
201 ['', ':', 'a', ':b::', 'c'])
202 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
203 ['', None, ':', 'a', None, ':', '', 'b', None, '',
204 None, '::', 'c'])
205 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
206 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000207
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 def test_qualified_re_split(self):
209 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
210 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
211 self.assertEqual(re.split("(:)", ":a:b::c", 2),
212 ['', ':', 'a', ':', 'b::c'])
213 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
214 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000215
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000216 def test_re_findall(self):
217 self.assertEqual(re.findall(":+", "abc"), [])
218 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
219 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
220 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
221 (":", ":"),
222 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000223
Skip Montanaro5ba00542003-04-25 16:00:14 +0000224 def test_bug_117612(self):
225 self.assertEqual(re.findall(r"(a|(b))", "aba"),
226 [("a", ""),("b", "b"),("a", "")])
227
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000228 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000229 self.assertEqual(re.match('a', 'a').groups(), ())
230 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
231 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
232 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
233 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000234
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000235 pat = re.compile('((a)|(b))(c)?')
236 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
237 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
238 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
239 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
240 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000241
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000242 # A single group
243 m = re.match('(a)', 'a')
244 self.assertEqual(m.group(0), 'a')
245 self.assertEqual(m.group(0), 'a')
246 self.assertEqual(m.group(1), 'a')
247 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000248
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000249 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
250 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
251 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
252 (None, 'b', None))
253 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000254
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000255 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000256 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
257 ('(', 'a'))
258 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
259 (None, 'a'))
260 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
261 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
262 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
263 ('a', 'b'))
264 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
265 (None, 'd'))
266 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
267 (None, 'd'))
268 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
269 ('a', ''))
270
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000271 # Tests for bug #1177831: exercise groups other than the first group
272 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
273 self.assertEqual(p.match('abc').groups(),
274 ('a', 'b', 'c'))
275 self.assertEqual(p.match('ad').groups(),
276 ('a', None, 'd'))
277 self.assertEqual(p.match('abd'), None)
278 self.assertEqual(p.match('ac'), None)
279
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000280
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000281 def test_re_groupref(self):
282 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
283 ('|', 'a'))
284 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
285 (None, 'a'))
286 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
287 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
288 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
289 ('a', 'a'))
290 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
291 (None, None))
292
293 def test_groupdict(self):
294 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
295 'first second').groupdict(),
296 {'first':'first', 'second':'second'})
297
298 def test_expand(self):
299 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
300 "first second")
301 .expand(r"\2 \1 \g<second> \g<first>"),
302 "second first second first")
303
304 def test_repeat_minmax(self):
305 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
306 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
307 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
308 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
309
310 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
311 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
312 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
313 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
314 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
318
319 self.assertEqual(re.match("^x{1}$", "xxx"), None)
320 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
321 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
322 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
323
324 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
325 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
326 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
327 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
328 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
332
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000333 self.assertEqual(re.match("^x{}$", "xxx"), None)
334 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
335
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000336 def test_getattr(self):
337 self.assertEqual(re.match("(a)", "a").pos, 0)
338 self.assertEqual(re.match("(a)", "a").endpos, 1)
339 self.assertEqual(re.match("(a)", "a").string, "a")
340 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
341 self.assertNotEqual(re.match("(a)", "a").re, None)
342
343 def test_special_escapes(self):
344 self.assertEqual(re.search(r"\b(b.)\b",
345 "abcd abc bcd bx").group(1), "bx")
346 self.assertEqual(re.search(r"\B(b.)\B",
347 "abc bcd bc abxd").group(1), "bx")
348 self.assertEqual(re.search(r"\b(b.)\b",
349 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
350 self.assertEqual(re.search(r"\B(b.)\B",
351 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
352 self.assertEqual(re.search(r"\b(b.)\b",
353 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
354 self.assertEqual(re.search(r"\B(b.)\B",
355 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
356 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
357 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
358 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
359 self.assertEqual(re.search(r"\b(b.)\b",
360 u"abcd abc bcd bx").group(1), "bx")
361 self.assertEqual(re.search(r"\B(b.)\B",
362 u"abc bcd bc abxd").group(1), "bx")
363 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
364 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
365 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
366 self.assertEqual(re.search(r"\d\D\w\W\s\S",
367 "1aa! a").group(0), "1aa! a")
368 self.assertEqual(re.search(r"\d\D\w\W\s\S",
369 "1aa! a", re.LOCALE).group(0), "1aa! a")
370 self.assertEqual(re.search(r"\d\D\w\W\s\S",
371 "1aa! a", re.UNICODE).group(0), "1aa! a")
372
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000373 def test_bigcharset(self):
374 self.assertEqual(re.match(u"([\u2222\u2223])",
375 u"\u2222").group(1), u"\u2222")
376 self.assertEqual(re.match(u"([\u2222\u2223])",
377 u"\u2222", re.UNICODE).group(1), u"\u2222")
378
379 def test_anyall(self):
380 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
381 "a\nb")
382 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
383 "a\n\nb")
384
385 def test_non_consuming(self):
386 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
387 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
388 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
389 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
390 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
391 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
392 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
393
394 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
395 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
396 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
397 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
398
399 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000400 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
401 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000402 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
403 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
404 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
405 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
406 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
407 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
408 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
409 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
410
411 def test_category(self):
412 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
413
414 def test_getlower(self):
415 import _sre
416 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
417 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
418 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
419
420 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
421 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
422
423 def test_not_literal(self):
424 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
425 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
426
427 def test_search_coverage(self):
428 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
429 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
430
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000431 def test_re_escape(self):
432 p=""
433 for i in range(0, 256):
434 p = p + chr(i)
435 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
436 True)
437 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000438
Skip Montanaro1e703c62003-04-25 15:40:28 +0000439 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000440 self.assertEqual(pat.match(p) is not None, True)
441 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000442
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000443 def test_pickling(self):
444 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000445 self.pickle_test(pickle)
446 import cPickle
447 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000448 # old pickles expect the _compile() reconstructor in sre module
449 import warnings
Brett Cannon672237d2008-09-09 00:49:16 +0000450 with warnings.catch_warnings():
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000451 warnings.filterwarnings("ignore", "The sre module is deprecated",
452 DeprecationWarning)
453 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000454
455 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000456 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
457 s = pickle.dumps(oldpat)
458 newpat = pickle.loads(s)
459 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000460
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000461 def test_constants(self):
462 self.assertEqual(re.I, re.IGNORECASE)
463 self.assertEqual(re.L, re.LOCALE)
464 self.assertEqual(re.M, re.MULTILINE)
465 self.assertEqual(re.S, re.DOTALL)
466 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000467
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000468 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000469 for flag in [re.I, re.M, re.X, re.S, re.L]:
470 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000471
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000472 def test_sre_character_literals(self):
473 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
474 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
475 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
476 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
477 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
478 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
479 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
480 self.assertRaises(re.error, re.match, "\911", "")
481
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000482 def test_sre_character_class_literals(self):
483 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
484 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
485 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
486 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
487 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
488 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
489 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
490 self.assertRaises(re.error, re.match, "[\911]", "")
491
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000492 def test_bug_113254(self):
493 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
494 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
495 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
496
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000497 def test_bug_527371(self):
498 # bug described in patches 527371/672491
499 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
500 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
501 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
502 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
503 self.assertEqual(re.match("((a))", "a").lastindex, 1)
504
505 def test_bug_545855(self):
506 # bug 545855 -- This pattern failed to cause a compile error as it
507 # should, instead provoking a TypeError.
508 self.assertRaises(re.error, re.compile, 'foo[a-')
509
510 def test_bug_418626(self):
511 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
512 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
513 # pattern '*?' on a long string.
514 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
515 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
516 20003)
517 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000518 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000519 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000520 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000521
522 def test_bug_612074(self):
523 pat=u"["+re.escape(u"\u2039")+u"]"
524 self.assertEqual(re.compile(pat) and 1, 1)
525
Skip Montanaro1e703c62003-04-25 15:40:28 +0000526 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000527 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000528 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000529 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
530 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
531 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000532
533 def test_scanner(self):
534 def s_ident(scanner, token): return token
535 def s_operator(scanner, token): return "op%s" % token
536 def s_float(scanner, token): return float(token)
537 def s_int(scanner, token): return int(token)
538
539 scanner = Scanner([
540 (r"[a-zA-Z_]\w*", s_ident),
541 (r"\d+\.\d*", s_float),
542 (r"\d+", s_int),
543 (r"=|\+|-|\*|/", s_operator),
544 (r"\s+", None),
545 ])
546
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000547 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
548
Skip Montanaro1e703c62003-04-25 15:40:28 +0000549 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
550 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
551 'op+', 'bar'], ''))
552
Skip Montanaro5ba00542003-04-25 16:00:14 +0000553 def test_bug_448951(self):
554 # bug 448951 (similar to 429357, but with single char match)
555 # (Also test greedy matches.)
556 for op in '','?','*':
557 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
558 (None, None))
559 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
560 ('a:', 'a'))
561
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000562 def test_bug_725106(self):
563 # capturing groups in alternatives in repeats
564 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
565 ('b', 'a'))
566 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
567 ('c', 'b'))
568 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
569 ('b', None))
570 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
571 ('b', None))
572 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
573 ('b', 'a'))
574 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
575 ('c', 'b'))
576 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
577 ('b', None))
578 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
579 ('b', None))
580
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000581 def test_bug_725149(self):
582 # mark_stack_base restoring before restoring marks
583 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
584 ('a', None))
585 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
586 ('a', None, None))
587
Just van Rossum12723ba2003-07-02 20:03:04 +0000588 def test_bug_764548(self):
589 # bug 764548, re.compile() barfs on str/unicode subclasses
590 try:
591 unicode
592 except NameError:
593 return # no problem if we have no unicode
594 class my_unicode(unicode): pass
595 pat = re.compile(my_unicode("abc"))
596 self.assertEqual(pat.match("xyz"), None)
597
Skip Montanaro5ba00542003-04-25 16:00:14 +0000598 def test_finditer(self):
599 iter = re.finditer(r":+", "a:b::c:::d")
600 self.assertEqual([item.group(0) for item in iter],
601 [":", "::", ":::"])
602
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000603 def test_bug_926075(self):
604 try:
605 unicode
606 except NameError:
607 return # no problem if we have no unicode
608 self.assert_(re.compile('bug_926075') is not
609 re.compile(eval("u'bug_926075'")))
610
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000611 def test_bug_931848(self):
612 try:
613 unicode
614 except NameError:
615 pass
616 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
617 self.assertEqual(re.compile(pattern).split("a.b.c"),
618 ['a','b','c'])
619
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000620 def test_bug_581080(self):
621 iter = re.finditer(r"\s", "a b")
622 self.assertEqual(iter.next().span(), (1,2))
623 self.assertRaises(StopIteration, iter.next)
624
625 scanner = re.compile(r"\s").scanner("a b")
626 self.assertEqual(scanner.search().span(), (1, 2))
627 self.assertEqual(scanner.search(), None)
628
629 def test_bug_817234(self):
630 iter = re.finditer(r".*", "asdf")
631 self.assertEqual(iter.next().span(), (0, 4))
632 self.assertEqual(iter.next().span(), (4, 4))
633 self.assertRaises(StopIteration, iter.next)
634
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000635 def test_empty_array(self):
636 # SF buf 1647541
637 import array
638 for typecode in 'cbBuhHiIlLfd':
639 a = array.array(typecode)
640 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000641 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000642
Guido van Rossumae04c332008-01-03 19:12:44 +0000643 def test_inline_flags(self):
644 # Bug #1700
645 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
646 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
647
648 p = re.compile(upper_char, re.I | re.U)
649 q = p.match(lower_char)
650 self.assertNotEqual(q, None)
651
652 p = re.compile(lower_char, re.I | re.U)
653 q = p.match(upper_char)
654 self.assertNotEqual(q, None)
655
656 p = re.compile('(?i)' + upper_char, re.U)
657 q = p.match(lower_char)
658 self.assertNotEqual(q, None)
659
660 p = re.compile('(?i)' + lower_char, re.U)
661 q = p.match(upper_char)
662 self.assertNotEqual(q, None)
663
664 p = re.compile('(?iu)' + upper_char)
665 q = p.match(lower_char)
666 self.assertNotEqual(q, None)
667
668 p = re.compile('(?iu)' + lower_char)
669 q = p.match(upper_char)
670 self.assertNotEqual(q, None)
671
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000672 def test_dollar_matches_twice(self):
673 "$ matches the end of string, and just before the terminating \n"
674 pattern = re.compile('$')
675 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
676 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
677 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
678
679 pattern = re.compile('$', re.MULTILINE)
680 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
681 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
682 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
683
Guido van Rossumae04c332008-01-03 19:12:44 +0000684
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000685def run_re_tests():
686 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
687 if verbose:
688 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000689 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000690 # To save time, only run the first and last 10 tests
691 #tests = tests[:10] + tests[-10:]
692 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000693
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000694 for t in tests:
695 sys.stdout.flush()
696 pattern = s = outcome = repl = expected = None
697 if len(t) == 5:
698 pattern, s, outcome, repl, expected = t
699 elif len(t) == 3:
700 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000701 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000702 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
703
Guido van Rossum41360a41998-03-26 19:42:58 +0000704 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000705 obj = re.compile(pattern)
706 except re.error:
707 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000708 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000709 print '=== Syntax error:', t
710 except KeyboardInterrupt: raise KeyboardInterrupt
711 except:
712 print '*** Unexpected error ***', t
713 if verbose:
714 traceback.print_exc(file=sys.stdout)
715 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000716 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000717 result = obj.search(s)
718 except re.error, msg:
719 print '=== Unexpected exception', t, repr(msg)
720 if outcome == SYNTAX_ERROR:
721 # This should have been a syntax error; forget it.
722 pass
723 elif outcome == FAIL:
724 if result is None: pass # No match, as expected
725 else: print '=== Succeeded incorrectly', t
726 elif outcome == SUCCEED:
727 if result is not None:
728 # Matched, as expected, so now we compute the
729 # result string and compare it to our expected result.
730 start, end = result.span(0)
731 vardict={'found': result.group(0),
732 'groups': result.group(),
733 'flags': result.re.flags}
734 for i in range(1, 100):
735 try:
736 gi = result.group(i)
737 # Special hack because else the string concat fails:
738 if gi is None:
739 gi = "None"
740 except IndexError:
741 gi = "Error"
742 vardict['g%d' % i] = gi
743 for i in result.re.groupindex.keys():
744 try:
745 gi = result.group(i)
746 if gi is None:
747 gi = "None"
748 except IndexError:
749 gi = "Error"
750 vardict[i] = gi
751 repl = eval(repl, vardict)
752 if repl != expected:
753 print '=== grouping error', t,
754 print repr(repl) + ' should be ' + repr(expected)
755 else:
756 print '=== Failed incorrectly', t
757
758 # Try the match on a unicode string, and check that it
759 # still succeeds.
760 try:
761 result = obj.search(unicode(s, "latin-1"))
762 if result is None:
763 print '=== Fails on unicode match', t
764 except NameError:
765 continue # 1.5.2
766 except TypeError:
767 continue # unicode test case
768
769 # Try the match on a unicode pattern, and check that it
770 # still succeeds.
771 obj=re.compile(unicode(pattern, "latin-1"))
772 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000773 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000774 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000775
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000776 # Try the match with the search area limited to the extent
777 # of the match and see if it still succeeds. \B will
778 # break (because it won't match at the end or start of a
779 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000780
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000781 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
782 and result is not None:
783 obj = re.compile(pattern)
784 result = obj.search(s, result.start(0), result.end(0) + 1)
785 if result is None:
786 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000787
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000788 # Try the match with IGNORECASE enabled, and check that it
789 # still succeeds.
790 obj = re.compile(pattern, re.IGNORECASE)
791 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000792 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000793 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000794
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000795 # Try the match with LOCALE enabled, and check that it
796 # still succeeds.
797 obj = re.compile(pattern, re.LOCALE)
798 result = obj.search(s)
799 if result is None:
800 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000801
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000802 # Try the match with UNICODE locale enabled, and check
803 # that it still succeeds.
804 obj = re.compile(pattern, re.UNICODE)
805 result = obj.search(s)
806 if result is None:
807 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000808
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000809def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000810 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000811 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000812
813if __name__ == "__main__":
814 test_main()