blob: 96a83b88c1813836f590639a3b2e2b021e5e549b [file] [log] [blame]
Brett Cannon1cd02472008-09-09 01:52:27 +00001from test.support import verbose, run_unittest
Guido van Rossum8e0ce301997-07-11 19:34:44 +00002import re
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003from re import Scanner
Georg Brandl1b37e872010-03-14 10:45:50 +00004import sys, traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00005from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00006
Guido van Rossum23b22571997-07-17 22:36:14 +00007# Misc tests from Tim Peters' re.doc
8
Just van Rossum6802c6e2003-07-02 14:36:59 +00009# WARNING: Don't change details in these tests if you don't know
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000010# what you're doing. Some of these tests were carefuly modeled to
11# cover most of the code.
12
Skip Montanaro8ed06da2003-04-24 19:43:18 +000013import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000014
Skip Montanaro8ed06da2003-04-24 19:43:18 +000015class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000016
17 def test_weakref(self):
18 s = 'QabbbcR'
19 x = re.compile('ab+c')
20 y = proxy(x)
21 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
22
Skip Montanaro8ed06da2003-04-24 19:43:18 +000023 def test_search_star_plus(self):
24 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
25 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
26 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
27 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000028 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
30 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
31 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
32 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000033 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000034
Skip Montanaro8ed06da2003-04-24 19:43:18 +000035 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000036 int_value = int(matchobj.group(0))
37 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000038
Skip Montanaro8ed06da2003-04-24 19:43:18 +000039 def test_basic_re_sub(self):
40 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
41 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
42 '9.3 -3 24x100y')
43 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
44 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000045
Skip Montanaro8ed06da2003-04-24 19:43:18 +000046 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
47 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 s = r"\1\1"
50 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
51 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
52 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000053
Skip Montanaro8ed06da2003-04-24 19:43:18 +000054 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
55 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
56 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
57 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000058
Skip Montanaro8ed06da2003-04-24 19:43:18 +000059 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
60 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
61 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
62 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
63 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000066
Skip Montanaro2726fcd2003-04-25 14:31:54 +000067 def test_bug_449964(self):
68 # fails for group followed by other escape
69 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
70 'xx\bxx\b')
71
72 def test_bug_449000(self):
73 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000074 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
75 'abc\ndef\n')
76 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n')
78 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n')
80 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000082
Christian Heimes5fb7c2a2007-12-24 08:52:31 +000083 def test_bug_1661(self):
84 # Verify that flags do not get silently ignored with compiled patterns
85 pattern = re.compile('.')
86 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
87 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
88 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
89 self.assertRaises(ValueError, re.compile, pattern, re.I)
90
Guido van Rossum92f8f3e2008-09-10 14:30:50 +000091 def test_bug_3629(self):
92 # A regex that triggered a bug in the sre-code validator
93 re.compile("(?P<quote>)(?(quote))")
94
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +000095 def test_sub_template_numeric_escape(self):
96 # bug 776311 and friends
97 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
98 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
99 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
100 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
101 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
102 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
103 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
104
105 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
106 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
107
108 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
109 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
110 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
111 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
112 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
113
114 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
115 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000116
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000117 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
118 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
119 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
120 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
121 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
122 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
123 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
124 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
125 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
126 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
127 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
128 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
129
130 # in python2.3 (etc), these loop endlessly in sre_parser.py
131 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
132 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
133 'xz8')
134 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
135 'xza')
136
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000137 def test_qualified_re_sub(self):
138 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
139 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000140
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000141 def test_bug_114660(self):
142 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
143 'hello there')
144
145 def test_bug_462270(self):
146 # Test for empty sub() behaviour, see SF bug #462270
147 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
148 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
149
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000150 def test_symbolic_refs(self):
151 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
152 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
153 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
154 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
155 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
156 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
157 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
158 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000159 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000160
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000161 def test_re_subn(self):
162 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
163 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
164 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
165 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
166 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000167
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000168 def test_re_split(self):
169 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
170 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
171 self.assertEqual(re.split("(:*)", ":a:b::c"),
172 ['', ':', 'a', ':', 'b', '::', 'c'])
173 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
174 self.assertEqual(re.split("(:)*", ":a:b::c"),
175 ['', ':', 'a', ':', 'b', ':', 'c'])
176 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
177 ['', ':', 'a', ':b::', 'c'])
178 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
179 ['', None, ':', 'a', None, ':', '', 'b', None, '',
180 None, '::', 'c'])
181 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
182 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000183
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000184 def test_qualified_re_split(self):
185 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
186 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
187 self.assertEqual(re.split("(:)", ":a:b::c", 2),
188 ['', ':', 'a', ':', 'b::c'])
189 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
190 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000191
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000192 def test_re_findall(self):
193 self.assertEqual(re.findall(":+", "abc"), [])
194 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
195 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
196 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
197 (":", ":"),
198 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000199
Skip Montanaro5ba00542003-04-25 16:00:14 +0000200 def test_bug_117612(self):
201 self.assertEqual(re.findall(r"(a|(b))", "aba"),
202 [("a", ""),("b", "b"),("a", "")])
203
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000204 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000205 self.assertEqual(re.match('a', 'a').groups(), ())
206 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
207 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
208 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
209 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000210
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000211 pat = re.compile('((a)|(b))(c)?')
212 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
213 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
214 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
215 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
216 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000217
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000218 # A single group
219 m = re.match('(a)', 'a')
220 self.assertEqual(m.group(0), 'a')
221 self.assertEqual(m.group(0), 'a')
222 self.assertEqual(m.group(1), 'a')
223 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000224
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000225 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
226 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
227 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
228 (None, 'b', None))
229 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000230
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000231 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000232 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
233 ('(', 'a'))
234 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
235 (None, 'a'))
236 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
237 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
238 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
239 ('a', 'b'))
240 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
241 (None, 'd'))
242 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
243 (None, 'd'))
244 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
245 ('a', ''))
246
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000247 # Tests for bug #1177831: exercise groups other than the first group
248 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
249 self.assertEqual(p.match('abc').groups(),
250 ('a', 'b', 'c'))
251 self.assertEqual(p.match('ad').groups(),
252 ('a', None, 'd'))
253 self.assertEqual(p.match('abd'), None)
254 self.assertEqual(p.match('ac'), None)
255
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000256
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000257 def test_re_groupref(self):
258 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
259 ('|', 'a'))
260 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
261 (None, 'a'))
262 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
263 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
264 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
265 ('a', 'a'))
266 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
267 (None, None))
268
269 def test_groupdict(self):
270 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
271 'first second').groupdict(),
272 {'first':'first', 'second':'second'})
273
274 def test_expand(self):
275 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
276 "first second")
277 .expand(r"\2 \1 \g<second> \g<first>"),
278 "second first second first")
279
280 def test_repeat_minmax(self):
281 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
282 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
283 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
284 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
285
286 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
287 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
288 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
289 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
290 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
291 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
292 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
293 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
294
295 self.assertEqual(re.match("^x{1}$", "xxx"), None)
296 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
297 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
298 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
299
300 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
301 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
302 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
303 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
304 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
305 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
306 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
307 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
308
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000309 self.assertEqual(re.match("^x{}$", "xxx"), None)
310 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
311
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000312 def test_getattr(self):
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000313 self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
Antoine Pitroufd036452008-08-19 17:56:33 +0000314 self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +0000315 self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
316 self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
317 self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
318 {'first': 1, 'other': 2})
319
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000320 self.assertEqual(re.match("(a)", "a").pos, 0)
321 self.assertEqual(re.match("(a)", "a").endpos, 1)
322 self.assertEqual(re.match("(a)", "a").string, "a")
323 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
324 self.assertNotEqual(re.match("(a)", "a").re, None)
325
326 def test_special_escapes(self):
327 self.assertEqual(re.search(r"\b(b.)\b",
328 "abcd abc bcd bx").group(1), "bx")
329 self.assertEqual(re.search(r"\B(b.)\B",
330 "abc bcd bc abxd").group(1), "bx")
331 self.assertEqual(re.search(r"\b(b.)\b",
332 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
333 self.assertEqual(re.search(r"\B(b.)\B",
334 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
335 self.assertEqual(re.search(r"\b(b.)\b",
336 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
337 self.assertEqual(re.search(r"\B(b.)\B",
338 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
339 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
340 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
341 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
342 self.assertEqual(re.search(r"\b(b.)\b",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000343 "abcd abc bcd bx").group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000344 self.assertEqual(re.search(r"\B(b.)\B",
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000345 "abc bcd bc abxd").group(1), "bx")
346 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
347 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
348 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000349 self.assertEqual(re.search(r"\d\D\w\W\s\S",
350 "1aa! a").group(0), "1aa! a")
351 self.assertEqual(re.search(r"\d\D\w\W\s\S",
352 "1aa! a", re.LOCALE).group(0), "1aa! a")
353 self.assertEqual(re.search(r"\d\D\w\W\s\S",
354 "1aa! a", re.UNICODE).group(0), "1aa! a")
355
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000356 def test_bigcharset(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000357 self.assertEqual(re.match("([\u2222\u2223])",
358 "\u2222").group(1), "\u2222")
359 self.assertEqual(re.match("([\u2222\u2223])",
360 "\u2222", re.UNICODE).group(1), "\u2222")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000361
362 def test_anyall(self):
363 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
364 "a\nb")
365 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
366 "a\n\nb")
367
368 def test_non_consuming(self):
369 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
370 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
371 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
372 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
373 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
374 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
375 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
376
377 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
378 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
379 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
380 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
381
382 def test_ignore_case(self):
Benjamin Petersona786b022008-08-25 21:05:21 +0000383 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
384 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000385 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
386 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
387 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
388 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
389 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
390 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
391 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
392 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
393
394 def test_category(self):
395 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
396
397 def test_getlower(self):
398 import _sre
399 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
400 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
401 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
402
403 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000404 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000405
406 def test_not_literal(self):
407 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
408 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
409
410 def test_search_coverage(self):
411 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
412 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
413
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000414 def test_re_escape(self):
415 p=""
Guido van Rossum698280d2008-09-10 17:44:35 +0000416 self.assertEqual(re.escape(p), p)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000417 for i in range(0, 256):
418 p = p + chr(i)
419 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
420 True)
421 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
Guido van Rossum49946571997-07-18 04:26:25 +0000422
Skip Montanaro1e703c62003-04-25 15:40:28 +0000423 pat=re.compile(re.escape(p))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000424 self.assertEqual(pat.match(p) is not None, True)
425 self.assertEqual(pat.match(p).span(), (0,256))
Guido van Rossum49946571997-07-18 04:26:25 +0000426
Guido van Rossum698280d2008-09-10 17:44:35 +0000427 def test_re_escape_byte(self):
428 p=b""
429 self.assertEqual(re.escape(p), p)
430 for i in range(0, 256):
431 b = bytes([i])
432 p += b
433 self.assertEqual(re.match(re.escape(b), b) is not None, True)
434 self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
435
436 pat=re.compile(re.escape(p))
437 self.assertEqual(pat.match(p) is not None, True)
438 self.assertEqual(pat.match(p).span(), (0,256))
439
Skip Montanaro1e703c62003-04-25 15:40:28 +0000440 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000441 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
442 s = pickle.dumps(oldpat)
443 newpat = pickle.loads(s)
444 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000445
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000446 def test_constants(self):
447 self.assertEqual(re.I, re.IGNORECASE)
448 self.assertEqual(re.L, re.LOCALE)
449 self.assertEqual(re.M, re.MULTILINE)
450 self.assertEqual(re.S, re.DOTALL)
451 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000452
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000453 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000454 for flag in [re.I, re.M, re.X, re.S, re.L]:
455 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000456
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000457 def test_sre_character_literals(self):
458 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
459 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
460 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
461 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
462 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
463 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
464 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
465 self.assertRaises(re.error, re.match, "\911", "")
466
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000467 def test_sre_character_class_literals(self):
468 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
469 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
470 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
471 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
472 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
473 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
474 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
475 self.assertRaises(re.error, re.match, "[\911]", "")
476
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000477 def test_bug_113254(self):
478 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
479 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
480 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
481
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000482 def test_bug_527371(self):
483 # bug described in patches 527371/672491
484 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
485 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
486 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
487 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
488 self.assertEqual(re.match("((a))", "a").lastindex, 1)
489
490 def test_bug_545855(self):
491 # bug 545855 -- This pattern failed to cause a compile error as it
492 # should, instead provoking a TypeError.
493 self.assertRaises(re.error, re.compile, 'foo[a-')
494
495 def test_bug_418626(self):
496 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
497 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
498 # pattern '*?' on a long string.
499 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
500 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
501 20003)
502 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000503 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000504 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000505 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000506
507 def test_bug_612074(self):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000508 pat="["+re.escape("\u2039")+"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000509 self.assertEqual(re.compile(pat) and 1, 1)
510
Skip Montanaro1e703c62003-04-25 15:40:28 +0000511 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000512 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000513 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000514 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
515 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
516 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000517
518 def test_scanner(self):
519 def s_ident(scanner, token): return token
520 def s_operator(scanner, token): return "op%s" % token
521 def s_float(scanner, token): return float(token)
522 def s_int(scanner, token): return int(token)
523
524 scanner = Scanner([
525 (r"[a-zA-Z_]\w*", s_ident),
526 (r"\d+\.\d*", s_float),
527 (r"\d+", s_int),
528 (r"=|\+|-|\*|/", s_operator),
529 (r"\s+", None),
530 ])
531
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000532 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
533
Skip Montanaro1e703c62003-04-25 15:40:28 +0000534 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
535 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
536 'op+', 'bar'], ''))
537
Skip Montanaro5ba00542003-04-25 16:00:14 +0000538 def test_bug_448951(self):
539 # bug 448951 (similar to 429357, but with single char match)
540 # (Also test greedy matches.)
541 for op in '','?','*':
542 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
543 (None, None))
544 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
545 ('a:', 'a'))
546
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000547 def test_bug_725106(self):
548 # capturing groups in alternatives in repeats
549 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
550 ('b', 'a'))
551 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
552 ('c', 'b'))
553 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
554 ('b', None))
555 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
556 ('b', None))
557 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
558 ('b', 'a'))
559 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
560 ('c', 'b'))
561 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
562 ('b', None))
563 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
564 ('b', None))
565
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000566 def test_bug_725149(self):
567 # mark_stack_base restoring before restoring marks
568 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
569 ('a', None))
570 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
571 ('a', None, None))
572
Just van Rossum12723ba2003-07-02 20:03:04 +0000573 def test_bug_764548(self):
574 # bug 764548, re.compile() barfs on str/unicode subclasses
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000575 class my_unicode(str): pass
Just van Rossum12723ba2003-07-02 20:03:04 +0000576 pat = re.compile(my_unicode("abc"))
577 self.assertEqual(pat.match("xyz"), None)
578
Skip Montanaro5ba00542003-04-25 16:00:14 +0000579 def test_finditer(self):
580 iter = re.finditer(r":+", "a:b::c:::d")
581 self.assertEqual([item.group(0) for item in iter],
582 [":", "::", ":::"])
583
Thomas Wouters40a088d2008-03-18 20:19:54 +0000584 def test_bug_926075(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000585 self.assertTrue(re.compile('bug_926075') is not
Thomas Wouters40a088d2008-03-18 20:19:54 +0000586 re.compile(b'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000587
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000588 def test_bug_931848(self):
Guido van Rossum7ebb9702007-05-15 21:39:58 +0000589 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000590 self.assertEqual(re.compile(pattern).split("a.b.c"),
591 ['a','b','c'])
592
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000593 def test_bug_581080(self):
594 iter = re.finditer(r"\s", "a b")
Georg Brandla18af4e2007-04-21 15:47:16 +0000595 self.assertEqual(next(iter).span(), (1,2))
596 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000597
598 scanner = re.compile(r"\s").scanner("a b")
599 self.assertEqual(scanner.search().span(), (1, 2))
600 self.assertEqual(scanner.search(), None)
601
602 def test_bug_817234(self):
603 iter = re.finditer(r".*", "asdf")
Georg Brandla18af4e2007-04-21 15:47:16 +0000604 self.assertEqual(next(iter).span(), (0, 4))
605 self.assertEqual(next(iter).span(), (4, 4))
606 self.assertRaises(StopIteration, next, iter)
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000607
Mark Dickinson1f268282009-07-28 17:22:36 +0000608 def test_bug_6561(self):
609 # '\d' should match characters in Unicode category 'Nd'
610 # (Number, Decimal Digit), but not those in 'Nl' (Number,
611 # Letter) or 'No' (Number, Other).
612 decimal_digits = [
613 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
614 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
615 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
616 ]
617 for x in decimal_digits:
618 self.assertEqual(re.match('^\d$', x).group(0), x)
619
620 not_decimal_digits = [
621 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
622 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
623 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
624 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
625 ]
626 for x in not_decimal_digits:
627 self.assertIsNone(re.match('^\d$', x))
628
Guido van Rossumd8faa362007-04-27 19:54:29 +0000629 def test_empty_array(self):
630 # SF buf 1647541
631 import array
Guido van Rossum166746c2007-07-03 15:39:16 +0000632 for typecode in 'bBuhHiIlLfd':
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633 a = array.array(typecode)
Antoine Pitroufd036452008-08-19 17:56:33 +0000634 self.assertEqual(re.compile(b"bla").match(a), None)
635 self.assertEqual(re.compile(b"").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000636
Christian Heimes072c0f12008-01-03 23:01:04 +0000637 def test_inline_flags(self):
638 # Bug #1700
Christian Heimes2e1d0f02008-01-04 00:47:51 +0000639 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
640 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
Christian Heimes072c0f12008-01-03 23:01:04 +0000641
642 p = re.compile(upper_char, re.I | re.U)
643 q = p.match(lower_char)
644 self.assertNotEqual(q, None)
645
646 p = re.compile(lower_char, re.I | re.U)
647 q = p.match(upper_char)
648 self.assertNotEqual(q, None)
649
650 p = re.compile('(?i)' + upper_char, re.U)
651 q = p.match(lower_char)
652 self.assertNotEqual(q, None)
653
654 p = re.compile('(?i)' + lower_char, re.U)
655 q = p.match(upper_char)
656 self.assertNotEqual(q, None)
657
658 p = re.compile('(?iu)' + upper_char)
659 q = p.match(lower_char)
660 self.assertNotEqual(q, None)
661
662 p = re.compile('(?iu)' + lower_char)
663 q = p.match(upper_char)
664 self.assertNotEqual(q, None)
665
Christian Heimes25bb7832008-01-11 16:17:00 +0000666 def test_dollar_matches_twice(self):
667 "$ matches the end of string, and just before the terminating \n"
668 pattern = re.compile('$')
669 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
670 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
671 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
672
673 pattern = re.compile('$', re.MULTILINE)
674 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
675 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
676 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
677
Antoine Pitroufd036452008-08-19 17:56:33 +0000678 def test_bytes_str_mixing(self):
679 # Mixing str and bytes is disallowed
680 pat = re.compile('.')
681 bpat = re.compile(b'.')
682 self.assertRaises(TypeError, pat.match, b'b')
683 self.assertRaises(TypeError, bpat.match, 'b')
684 self.assertRaises(TypeError, pat.sub, b'b', 'c')
685 self.assertRaises(TypeError, pat.sub, 'b', b'c')
686 self.assertRaises(TypeError, pat.sub, b'b', b'c')
687 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
688 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
689 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
690
691 def test_ascii_and_unicode_flag(self):
692 # String patterns
693 for flags in (0, re.UNICODE):
694 pat = re.compile('\xc0', flags | re.IGNORECASE)
695 self.assertNotEqual(pat.match('\xe0'), None)
696 pat = re.compile('\w', flags)
697 self.assertNotEqual(pat.match('\xe0'), None)
698 pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
699 self.assertEqual(pat.match('\xe0'), None)
700 pat = re.compile('(?a)\xc0', re.IGNORECASE)
701 self.assertEqual(pat.match('\xe0'), None)
702 pat = re.compile('\w', re.ASCII)
703 self.assertEqual(pat.match('\xe0'), None)
704 pat = re.compile('(?a)\w')
705 self.assertEqual(pat.match('\xe0'), None)
706 # Bytes patterns
707 for flags in (0, re.ASCII):
708 pat = re.compile(b'\xc0', re.IGNORECASE)
709 self.assertEqual(pat.match(b'\xe0'), None)
710 pat = re.compile(b'\w')
711 self.assertEqual(pat.match(b'\xe0'), None)
712 # Incompatibilities
713 self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
714 self.assertRaises(ValueError, re.compile, b'(?u)\w')
715 self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
716 self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
717 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
718 self.assertRaises(ValueError, re.compile, '(?au)\w')
719
Ezio Melottib92ed7c2010-03-06 15:24:08 +0000720 def test_bug_6509(self):
721 # Replacement strings of both types must parse properly.
722 # all strings
723 pat = re.compile('a(\w)')
724 self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
725 pat = re.compile('a(.)')
726 self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
727 pat = re.compile('..')
728 self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
729
730 # all bytes
731 pat = re.compile(b'a(\w)')
732 self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
733 pat = re.compile(b'a(.)')
734 self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
735 pat = re.compile(b'..')
736 self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
737
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000738 def test_dealloc(self):
739 # issue 3299: check for segfault in debug build
740 import _sre
Ezio Melotti0f77f462010-01-23 10:49:39 +0000741 # the overflow limit is different on wide and narrow builds and it
742 # depends on the definition of SRE_CODE (see sre.h).
743 # 2**128 should be big enough to overflow on both. For smaller values
744 # a RuntimeError is raised instead of OverflowError.
745 long_overflow = 2**128
Antoine Pitrou82feb1f2010-01-14 17:34:48 +0000746 self.assertRaises(TypeError, re.finditer, "a", {})
747 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Victor Stinner5abeafb2010-03-04 21:59:53 +0000748 self.assertRaises(TypeError, _sre.compile, {}, 0, [])
Christian Heimes072c0f12008-01-03 23:01:04 +0000749
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000750def run_re_tests():
Georg Brandl1b37e872010-03-14 10:45:50 +0000751 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000752 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000753 print('Running re_tests test suite')
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000754 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000755 # To save time, only run the first and last 10 tests
756 #tests = tests[:10] + tests[-10:]
757 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000758
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000759 for t in tests:
760 sys.stdout.flush()
761 pattern = s = outcome = repl = expected = None
762 if len(t) == 5:
763 pattern, s, outcome, repl, expected = t
764 elif len(t) == 3:
765 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000766 else:
Collin Winter3add4d72007-08-29 23:37:32 +0000767 raise ValueError('Test tuples should have 3 or 5 fields', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000768
Guido van Rossum41360a41998-03-26 19:42:58 +0000769 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000770 obj = re.compile(pattern)
771 except re.error:
772 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000773 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000774 print('=== Syntax error:', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000775 except KeyboardInterrupt: raise KeyboardInterrupt
776 except:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000777 print('*** Unexpected error ***', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000778 if verbose:
779 traceback.print_exc(file=sys.stdout)
780 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000781 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000782 result = obj.search(s)
Guido van Rossumb940e112007-01-10 16:19:56 +0000783 except re.error as msg:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000784 print('=== Unexpected exception', t, repr(msg))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000785 if outcome == SYNTAX_ERROR:
786 # This should have been a syntax error; forget it.
787 pass
788 elif outcome == FAIL:
789 if result is None: pass # No match, as expected
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000790 else: print('=== Succeeded incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000791 elif outcome == SUCCEED:
792 if result is not None:
793 # Matched, as expected, so now we compute the
794 # result string and compare it to our expected result.
795 start, end = result.span(0)
796 vardict={'found': result.group(0),
797 'groups': result.group(),
798 'flags': result.re.flags}
799 for i in range(1, 100):
800 try:
801 gi = result.group(i)
802 # Special hack because else the string concat fails:
803 if gi is None:
804 gi = "None"
805 except IndexError:
806 gi = "Error"
807 vardict['g%d' % i] = gi
808 for i in result.re.groupindex.keys():
809 try:
810 gi = result.group(i)
811 if gi is None:
812 gi = "None"
813 except IndexError:
814 gi = "Error"
815 vardict[i] = gi
816 repl = eval(repl, vardict)
817 if repl != expected:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000818 print('=== grouping error', t, end=' ')
819 print(repr(repl) + ' should be ' + repr(expected))
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000820 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000821 print('=== Failed incorrectly', t)
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000822
Antoine Pitrou22628c42008-07-22 17:53:22 +0000823 # Try the match with both pattern and string converted to
824 # bytes, and check that it still succeeds.
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000825 try:
Antoine Pitrou22628c42008-07-22 17:53:22 +0000826 bpat = bytes(pattern, "ascii")
827 bs = bytes(s, "ascii")
828 except UnicodeEncodeError:
829 # skip non-ascii tests
830 pass
831 else:
832 try:
833 bpat = re.compile(bpat)
834 except Exception:
835 print('=== Fails on bytes pattern compile', t)
836 if verbose:
837 traceback.print_exc(file=sys.stdout)
838 else:
839 bytes_result = bpat.search(bs)
840 if bytes_result is None:
841 print('=== Fails on bytes pattern match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000842
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000843 # Try the match with the search area limited to the extent
844 # of the match and see if it still succeeds. \B will
845 # break (because it won't match at the end or start of a
846 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000847
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000848 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
849 and result is not None:
850 obj = re.compile(pattern)
851 result = obj.search(s, result.start(0), result.end(0) + 1)
852 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000853 print('=== Failed on range-limited match', t)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000854
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000855 # Try the match with IGNORECASE enabled, and check that it
856 # still succeeds.
857 obj = re.compile(pattern, re.IGNORECASE)
858 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000859 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000860 print('=== Fails on case-insensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000861
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000862 # Try the match with LOCALE enabled, and check that it
863 # still succeeds.
Antoine Pitrou22628c42008-07-22 17:53:22 +0000864 if '(?u)' not in pattern:
865 obj = re.compile(pattern, re.LOCALE)
866 result = obj.search(s)
867 if result is None:
868 print('=== Fails on locale-sensitive match', t)
Guido van Rossumdfa67901997-12-08 17:12:06 +0000869
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000870 # Try the match with UNICODE locale enabled, and check
871 # that it still succeeds.
872 obj = re.compile(pattern, re.UNICODE)
873 result = obj.search(s)
874 if result is None:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000875 print('=== Fails on unicode-sensitive match', t)
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000876
Gregory P. Smith5a631832010-07-27 05:31:29 +0000877
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000878def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000879 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000880 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000881
882if __name__ == "__main__":
883 test_main()