blob: 7aa69969c189098283fedf5d5b4f83376721dffe [file] [log] [blame]
Guido van Rossum8430c581998-04-03 21:47:12 +00001import sys
Fred Drake8ae9ce52000-08-18 16:09:56 +00002sys.path = ['.'] + sys.path
Guido van Rossum8430c581998-04-03 21:47:12 +00003
Guido van Rossumaf554a02007-08-16 23:48:43 +00004from test.test_support import verbose, run_unittest, catch_warning
Guido van Rossum8e0ce301997-07-11 19:34:44 +00005import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00006from re import Scanner
Eric S. Raymond2846b0a2001-02-09 12:00:47 +00007import sys, os, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000013# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Thomas Wouters40a088d2008-03-18 20:19:54 +000086 def test_bug_1140(self):
87 # re.sub(x, y, b'') should return b'', not '', and
88 # re.sub(x, y, '') should return '', not b''.
89 # Also:
90 # re.sub(x, y, str(x)) should return str(y), and
91 # re.sub(x, y, bytes(x)) should return
92 # str(y) if isinstance(y, str) else unicode(y).
93 for x in 'x', b'x':
94 for y in 'y', b'y':
95 z = re.sub(x, y, b'')
96 self.assertEqual(z, b'')
97 self.assertEqual(type(z), bytes)
98 #
99 z = re.sub(x, y, '')
100 self.assertEqual(z, '')
101 self.assertEqual(type(z), str)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000102
Christian Heimes5fb7c2a2007-12-24 08:52:31 +0000103 def test_bug_1661(self):
104 # Verify that flags do not get silently ignored with compiled patterns
105 pattern = re.compile('.')
106 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
107 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
108 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
109 self.assertRaises(ValueError, re.compile, pattern, re.I)
110
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000111 def test_sub_template_numeric_escape(self):
112 # bug 776311 and friends
113 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
114 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
115 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
116 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
117 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
118 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
119 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
120
121 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
122 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
123
124 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
125 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
126 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
127 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
128 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
129
130 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
131 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000132
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000133 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
134 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
135 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
136 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
137 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
138 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
139 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
140 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
141 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
142 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
143 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
144 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
145
146 # in python2.3 (etc), these loop endlessly in sre_parser.py
147 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
148 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
149 'xz8')
150 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
151 'xza')
152
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000153 def test_qualified_re_sub(self):
154 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
155 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000156
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000157 def test_bug_114660(self):
158 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
159 'hello there')
160
161 def test_bug_462270(self):
162 # Test for empty sub() behaviour, see SF bug #462270
163 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
164 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
165
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000166 def test_symbolic_refs(self):
167 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
168 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
169 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
170 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
171 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
172 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
173 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
174 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000175 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000176
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000177 def test_re_subn(self):
178 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
179 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
180 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
181 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
182 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000183
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000184 def test_re_split(self):
185 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
186 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
187 self.assertEqual(re.split("(:*)", ":a:b::c"),
188 ['', ':', 'a', ':', 'b', '::', 'c'])
189 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
190 self.assertEqual(re.split("(:)*", ":a:b::c"),
191 ['', ':', 'a', ':', 'b', ':', 'c'])
192 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
193 ['', ':', 'a', ':b::', 'c'])
194 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
195 ['', None, ':', 'a', None, ':', '', 'b', None, '',
196 None, '::', 'c'])
197 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
198 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000199
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000200 def test_qualified_re_split(self):
201 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
202 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
203 self.assertEqual(re.split("(:)", ":a:b::c", 2),
204 ['', ':', 'a', ':', 'b::c'])
205 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
206 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000207
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000208 def test_re_findall(self):
209 self.assertEqual(re.findall(":+", "abc"), [])
210 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
211 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
212 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
213 (":", ":"),
214 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000215
Skip Montanaro5ba00542003-04-25 16:00:14 +0000216 def test_bug_117612(self):
217 self.assertEqual(re.findall(r"(a|(b))", "aba"),
218 [("a", ""),("b", "b"),("a", "")])
219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000221 self.assertEqual(re.match('a', 'a').groups(), ())
222 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
223 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
224 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
225 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000226
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000227 pat = re.compile('((a)|(b))(c)?')
228 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
229 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
230 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
231 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
232 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000233
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000234 # A single group
235 m = re.match('(a)', 'a')
236 self.assertEqual(m.group(0), 'a')
237 self.assertEqual(m.group(0), 'a')
238 self.assertEqual(m.group(1), 'a')
239 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000240
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000241 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
242 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
243 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
244 (None, 'b', None))
245 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000246
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000247 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000248 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
249 ('(', 'a'))
250 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
251 (None, 'a'))
252 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
253 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
254 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
255 ('a', 'b'))
256 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
257 (None, 'd'))
258 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
259 (None, 'd'))
260 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
261 ('a', ''))
262
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000263 # Tests for bug #1177831: exercise groups other than the first group
264 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
265 self.assertEqual(p.match('abc').groups(),
266 ('a', 'b', 'c'))
267 self.assertEqual(p.match('ad').groups(),
268 ('a', None, 'd'))
269 self.assertEqual(p.match('abd'), None)
270 self.assertEqual(p.match('ac'), None)
271
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000272
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000273 def test_re_groupref(self):
274 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
275 ('|', 'a'))
276 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
277 (None, 'a'))
278 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
279 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
280 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
281 ('a', 'a'))
282 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
283 (None, None))
284
285 def test_groupdict(self):
286 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
287 'first second').groupdict(),
288 {'first':'first', 'second':'second'})
289
290 def test_expand(self):
291 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
292 "first second")
293 .expand(r"\2 \1 \g<second> \g<first>"),
294 "second first second first")
295
296 def test_repeat_minmax(self):
297 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
298 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
299 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
300 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
301
302 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
303 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
304 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
305 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
306 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
307 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
308 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
309 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
310
311 self.assertEqual(re.match("^x{1}$", "xxx"), None)
312 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
313 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
314 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
315
316 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
317 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
318 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
319 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
320 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
321 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
322 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
323 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
324
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000325 self.assertEqual(re.match("^x{}$", "xxx"), None)
326 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
327
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000328 def test_getattr(self):
329 self.assertEqual(re.match("(a)", "a").pos, 0)
330 self.assertEqual(re.match("(a)", "a").endpos, 1)
331 self.assertEqual(re.match("(a)", "a").string, "a")
332 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
333 self.assertNotEqual(re.match("(a)", "a").re, None)
334
335 def test_special_escapes(self):
336 self.assertEqual(re.search(r"\b(b.)\b",
337 "abcd abc bcd bx").group(1), "bx")
338 self.assertEqual(re.search(r"\B(b.)\B",
339 "abc bcd bc abxd").group(1), "bx")
340 self.assertEqual(re.search(r"\b(b.)\b",
341 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
342 self.assertEqual(re.search(r"\B(b.)\B",
343 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
344 self.assertEqual(re.search(r"\b(b.)\b",
345 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
346 self.assertEqual(re.search(r"\B(b.)\B",
347 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
348 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
349 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
350 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
351 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000352 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000353 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000354 "abc bcd bc abxd").group(1), "bx")
355 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
356 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
357 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000358 self.assertEqual(re.search(r"\d\D\w\W\s\S",
359 "1aa! a").group(0), "1aa! a")
360 self.assertEqual(re.search(r"\d\D\w\W\s\S",
361 "1aa! a", re.LOCALE).group(0), "1aa! a")
362 self.assertEqual(re.search(r"\d\D\w\W\s\S",
363 "1aa! a", re.UNICODE).group(0), "1aa! a")
364
365 def test_ignore_case(self):
366 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000367 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000368
369 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000370 self.assertEqual(re.match("([\u2222\u2223])",
371 "\u2222").group(1), "\u2222")
372 self.assertEqual(re.match("([\u2222\u2223])",
373 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000374
375 def test_anyall(self):
376 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
377 "a\nb")
378 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
379 "a\n\nb")
380
381 def test_non_consuming(self):
382 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
383 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
384 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
385 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
386 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
387 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
388 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
389
390 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
391 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
392 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
393 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
394
395 def test_ignore_case(self):
396 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
397 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
398 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
399 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
400 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
401 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
402 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
403 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
404
405 def test_category(self):
406 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
407
408 def test_getlower(self):
409 import _sre
410 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
411 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
412 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
413
414 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000415 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000416
417 def test_not_literal(self):
418 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
419 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
420
421 def test_search_coverage(self):
422 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
423 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
424
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000425 def test_re_escape(self):
426 p=""
427 for i in range(0, 256):
428 p = p + chr(i)
429 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
430 True)
431 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000432
Skip Montanaro1e703c62003-04-25 15:40:28 +0000433 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000434 self.assertEqual(pat.match(p) is not None, True)
435 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000436
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000437 def test_pickling(self):
438 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000439 self.pickle_test(pickle)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000440 # old pickles expect the _compile() reconstructor in sre module
441 import warnings
Guido van Rossumaf554a02007-08-16 23:48:43 +0000442 with catch_warning():
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443 warnings.filterwarnings("ignore", "The sre module is deprecated",
444 DeprecationWarning)
445 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000446
447 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000448 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
449 s = pickle.dumps(oldpat)
450 newpat = pickle.loads(s)
451 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000452
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000453 def test_constants(self):
454 self.assertEqual(re.I, re.IGNORECASE)
455 self.assertEqual(re.L, re.LOCALE)
456 self.assertEqual(re.M, re.MULTILINE)
457 self.assertEqual(re.S, re.DOTALL)
458 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000459
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000460 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000461 for flag in [re.I, re.M, re.X, re.S, re.L]:
462 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000463
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000464 def test_sre_character_literals(self):
465 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
466 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
467 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
468 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
469 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
470 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
471 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
472 self.assertRaises(re.error, re.match, "\911", "")
473
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000474 def test_sre_character_class_literals(self):
475 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
476 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
477 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
478 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
479 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
480 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
481 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
482 self.assertRaises(re.error, re.match, "[\911]", "")
483
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000484 def test_bug_113254(self):
485 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
486 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
487 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
488
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000489 def test_bug_527371(self):
490 # bug described in patches 527371/672491
491 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
492 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
493 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
494 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
495 self.assertEqual(re.match("((a))", "a").lastindex, 1)
496
497 def test_bug_545855(self):
498 # bug 545855 -- This pattern failed to cause a compile error as it
499 # should, instead provoking a TypeError.
500 self.assertRaises(re.error, re.compile, 'foo[a-')
501
502 def test_bug_418626(self):
503 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
504 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
505 # pattern '*?' on a long string.
506 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
507 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
508 20003)
509 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000510 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000511 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000512 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000513
514 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000515 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000516 self.assertEqual(re.compile(pat) and 1, 1)
517
Skip Montanaro1e703c62003-04-25 15:40:28 +0000518 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000519 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000520 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000521 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
522 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
523 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000524
525 def test_scanner(self):
526 def s_ident(scanner, token): return token
527 def s_operator(scanner, token): return "op%s" % token
528 def s_float(scanner, token): return float(token)
529 def s_int(scanner, token): return int(token)
530
531 scanner = Scanner([
532 (r"[a-zA-Z_]\w*", s_ident),
533 (r"\d+\.\d*", s_float),
534 (r"\d+", s_int),
535 (r"=|\+|-|\*|/", s_operator),
536 (r"\s+", None),
537 ])
538
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000539 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
540
Skip Montanaro1e703c62003-04-25 15:40:28 +0000541 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
542 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
543 'op+', 'bar'], ''))
544
Skip Montanaro5ba00542003-04-25 16:00:14 +0000545 def test_bug_448951(self):
546 # bug 448951 (similar to 429357, but with single char match)
547 # (Also test greedy matches.)
548 for op in '','?','*':
549 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
550 (None, None))
551 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
552 ('a:', 'a'))
553
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000554 def test_bug_725106(self):
555 # capturing groups in alternatives in repeats
556 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
557 ('b', 'a'))
558 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
559 ('c', 'b'))
560 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
561 ('b', None))
562 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
563 ('b', None))
564 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
565 ('b', 'a'))
566 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
567 ('c', 'b'))
568 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
569 ('b', None))
570 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
571 ('b', None))
572
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000573 def test_bug_725149(self):
574 # mark_stack_base restoring before restoring marks
575 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
576 ('a', None))
577 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
578 ('a', None, None))
579
Just van Rossum12723ba2003-07-02 20:03:04 +0000580 def test_bug_764548(self):
581 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000582 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000583 pat = re.compile(my_unicode("abc"))
584 self.assertEqual(pat.match("xyz"), None)
585
Skip Montanaro5ba00542003-04-25 16:00:14 +0000586 def test_finditer(self):
587 iter = re.finditer(r":+", "a:b::c:::d")
588 self.assertEqual([item.group(0) for item in iter],
589 [":", "::", ":::"])
590
Thomas Wouters40a088d2008-03-18 20:19:54 +0000591 def test_bug_926075(self):
592 self.assert_(re.compile('bug_926075') is not
593 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000594
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000595 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000596 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000597 self.assertEqual(re.compile(pattern).split("a.b.c"),
598 ['a','b','c'])
599
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000600 def test_bug_581080(self):
601 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000602 self.assertEqual(next(iter).span(), (1,2))
603 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000604
605 scanner = re.compile(r"\s").scanner("a b")
606 self.assertEqual(scanner.search().span(), (1, 2))
607 self.assertEqual(scanner.search(), None)
608
609 def test_bug_817234(self):
610 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000611 self.assertEqual(next(iter).span(), (0, 4))
612 self.assertEqual(next(iter).span(), (4, 4))
613 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000614
Guido van Rossumd8faa362007-04-27 19:54:29 +0000615 def test_empty_array(self):
616 # SF buf 1647541
617 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000618 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619 a = array.array(typecode)
620 self.assertEqual(re.compile("bla").match(a), None)
621 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000622
Christian Heimes072c0f12008-01-03 23:01:04 +0000623 def test_inline_flags(self):
624 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000625 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
626 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000627
628 p = re.compile(upper_char, re.I | re.U)
629 q = p.match(lower_char)
630 self.assertNotEqual(q, None)
631
632 p = re.compile(lower_char, re.I | re.U)
633 q = p.match(upper_char)
634 self.assertNotEqual(q, None)
635
636 p = re.compile('(?i)' + upper_char, re.U)
637 q = p.match(lower_char)
638 self.assertNotEqual(q, None)
639
640 p = re.compile('(?i)' + lower_char, re.U)
641 q = p.match(upper_char)
642 self.assertNotEqual(q, None)
643
644 p = re.compile('(?iu)' + upper_char)
645 q = p.match(lower_char)
646 self.assertNotEqual(q, None)
647
648 p = re.compile('(?iu)' + lower_char)
649 q = p.match(upper_char)
650 self.assertNotEqual(q, None)
651
Christian Heimes25bb7832008-01-11 16:17:00 +0000652 def test_dollar_matches_twice(self):
653 "$ matches the end of string, and just before the terminating \n"
654 pattern = re.compile('$')
655 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
656 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
657 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
658
659 pattern = re.compile('$', re.MULTILINE)
660 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
661 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
662 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
663
Christian Heimes072c0f12008-01-03 23:01:04 +0000664
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000665def run_re_tests():
666 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
667 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000668 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000669 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000670 # To save time, only run the first and last 10 tests
671 #tests = tests[:10] + tests[-10:]
672 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000673
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000674 for t in tests:
675 sys.stdout.flush()
676 pattern = s = outcome = repl = expected = None
677 if len(t) == 5:
678 pattern, s, outcome, repl, expected = t
679 elif len(t) == 3:
680 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000681 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000682 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000683
Guido van Rossum41360a41998-03-26 19:42:58 +0000684 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000685 obj = re.compile(pattern)
686 except re.error:
687 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000688 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000689 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000690 except KeyboardInterrupt: raise KeyboardInterrupt
691 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000692 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000693 if verbose:
694 traceback.print_exc(file=sys.stdout)
695 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000696 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000697 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000698 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000699 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000700 if outcome == SYNTAX_ERROR:
701 # This should have been a syntax error; forget it.
702 pass
703 elif outcome == FAIL:
704 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000705 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000706 elif outcome == SUCCEED:
707 if result is not None:
708 # Matched, as expected, so now we compute the
709 # result string and compare it to our expected result.
710 start, end = result.span(0)
711 vardict={'found': result.group(0),
712 'groups': result.group(),
713 'flags': result.re.flags}
714 for i in range(1, 100):
715 try:
716 gi = result.group(i)
717 # Special hack because else the string concat fails:
718 if gi is None:
719 gi = "None"
720 except IndexError:
721 gi = "Error"
722 vardict['g%d' % i] = gi
723 for i in result.re.groupindex.keys():
724 try:
725 gi = result.group(i)
726 if gi is None:
727 gi = "None"
728 except IndexError:
729 gi = "Error"
730 vardict[i] = gi
731 repl = eval(repl, vardict)
732 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000733 print('=== grouping error', t, end=' ')
734 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000735 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000736 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000737
738 # Try the match on a unicode string, and check that it
739 # still succeeds.
740 try:
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000741 result = obj.search(str(s, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000742 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000743 print('=== Fails on unicode match', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000744 except NameError:
745 continue # 1.5.2
746 except TypeError:
747 continue # unicode test case
748
749 # Try the match on a unicode pattern, and check that it
750 # still succeeds.
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000751 obj=re.compile(str(pattern, "latin-1"))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000752 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000753 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000754 print('=== Fails on unicode pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000755
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000756 # Try the match with the search area limited to the extent
757 # of the match and see if it still succeeds. \B will
758 # break (because it won't match at the end or start of a
759 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000760
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000761 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
762 and result is not None:
763 obj = re.compile(pattern)
764 result = obj.search(s, result.start(0), result.end(0) + 1)
765 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000766 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000767
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000768 # Try the match with IGNORECASE enabled, and check that it
769 # still succeeds.
770 obj = re.compile(pattern, re.IGNORECASE)
771 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000772 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000773 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000774
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000775 # Try the match with LOCALE enabled, and check that it
776 # still succeeds.
777 obj = re.compile(pattern, re.LOCALE)
778 result = obj.search(s)
779 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000780 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000781
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000782 # Try the match with UNICODE locale enabled, and check
783 # that it still succeeds.
784 obj = re.compile(pattern, re.UNICODE)
785 result = obj.search(s)
786 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000787 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000788
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000789def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000790 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000791 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000792
793if __name__ == "__main__":
794 test_main()