blob: f285c6bed2ca0032855d3b0ff171effb1a520c15 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Serhiy Storchaka7644ff12014-09-14 17:40:44 +03003from test.test_support import captured_stdout, have_unicode, requires_unicode, u
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00005from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04006import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02007import sys
8import string
9import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Antoine Pitrou735f36e2012-12-03 20:53:12 +010012
Guido van Rossum23b22571997-07-17 22:36:14 +000013# Misc tests from Tim Peters' re.doc
14
Just van Rossum6802c6e2003-07-02 14:36:59 +000015# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020016# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000017# cover most of the code.
18
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000020
Skip Montanaro8ed06da2003-04-24 19:43:18 +000021class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000022
23 def test_weakref(self):
24 s = 'QabbbcR'
25 x = re.compile('ab+c')
26 y = proxy(x)
27 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029 def test_search_star_plus(self):
30 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
31 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
32 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
33 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030034 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000035 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
36 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
37 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
38 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030039 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000042 int_value = int(matchobj.group(0))
43 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000044
Skip Montanaro8ed06da2003-04-24 19:43:18 +000045 def test_basic_re_sub(self):
46 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
47 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
48 '9.3 -3 24x100y')
49 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
50 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
53 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000054
Skip Montanaro8ed06da2003-04-24 19:43:18 +000055 s = r"\1\1"
56 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
57 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
58 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
63 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
66 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
68 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
69 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000070
Skip Montanaro8ed06da2003-04-24 19:43:18 +000071 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000072
Skip Montanaro2726fcd2003-04-25 14:31:54 +000073 def test_bug_449964(self):
74 # fails for group followed by other escape
75 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
76 'xx\bxx\b')
77
78 def test_bug_449000(self):
79 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000080 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
84 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
85 'abc\ndef\n')
86 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
87 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000088
Serhiy Storchaka7644ff12014-09-14 17:40:44 +030089 @requires_unicode
Guido van Rossum1ff91d92007-09-10 22:02:25 +000090 def test_bug_1140(self):
91 # re.sub(x, y, u'') should return u'', not '', and
92 # re.sub(x, y, '') should return '', not u''.
93 # Also:
94 # re.sub(x, y, unicode(x)) should return unicode(y), and
95 # re.sub(x, y, str(x)) should return
96 # str(y) if isinstance(y, str) else unicode(y).
97 for x in 'x', u'x':
98 for y in 'y', u'y':
99 z = re.sub(x, y, u'')
100 self.assertEqual(z, u'')
101 self.assertEqual(type(z), unicode)
102 #
103 z = re.sub(x, y, '')
104 self.assertEqual(z, '')
105 self.assertEqual(type(z), str)
106 #
107 z = re.sub(x, y, unicode(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), unicode)
110 #
111 z = re.sub(x, y, str(x))
112 self.assertEqual(z, y)
113 self.assertEqual(type(z), type(y))
114
Raymond Hettinger80016c92007-12-19 18:13:31 +0000115 def test_bug_1661(self):
116 # Verify that flags do not get silently ignored with compiled patterns
117 pattern = re.compile('.')
118 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
121 self.assertRaises(ValueError, re.compile, pattern, re.I)
122
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000123 def test_bug_3629(self):
124 # A regex that triggered a bug in the sre-code validator
125 re.compile("(?P<quote>)(?(quote))")
126
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000127 def test_sub_template_numeric_escape(self):
128 # bug 776311 and friends
129 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
130 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
131 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
132 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
133 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
134 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
135 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
136
137 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
138 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
139
140 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
141 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
142 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
143 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
144 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
145
146 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
147 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000148
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000149 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
158 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
159 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
160 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
161
162 # in python2.3 (etc), these loop endlessly in sre_parser.py
163 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
165 'xz8')
166 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
167 'xza')
168
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000169 def test_qualified_re_sub(self):
170 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
171 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000172
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000173 def test_bug_114660(self):
174 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
175 'hello there')
176
177 def test_bug_462270(self):
178 # Test for empty sub() behaviour, see SF bug #462270
179 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
180 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
181
Ezio Melottief317382012-11-03 20:31:12 +0200182 def test_symbolic_groups(self):
183 re.compile('(?P<a>x)(?P=a)(?(a)y)')
184 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
185 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
186 self.assertRaises(re.error, re.compile, '(?Px)')
187 self.assertRaises(re.error, re.compile, '(?P=)')
188 self.assertRaises(re.error, re.compile, '(?P=1)')
189 self.assertRaises(re.error, re.compile, '(?P=a)')
190 self.assertRaises(re.error, re.compile, '(?P=a1)')
191 self.assertRaises(re.error, re.compile, '(?P=a.)')
192 self.assertRaises(re.error, re.compile, '(?P<)')
193 self.assertRaises(re.error, re.compile, '(?P<>)')
194 self.assertRaises(re.error, re.compile, '(?P<1>)')
195 self.assertRaises(re.error, re.compile, '(?P<a.>)')
196 self.assertRaises(re.error, re.compile, '(?())')
197 self.assertRaises(re.error, re.compile, '(?(a))')
198 self.assertRaises(re.error, re.compile, '(?(1a))')
199 self.assertRaises(re.error, re.compile, '(?(a.))')
200
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000201 def test_symbolic_refs(self):
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000207 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
208 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
209 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
210 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000211 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000212
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000213 def test_re_subn(self):
214 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
215 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
216 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
217 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
218 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 def test_re_split(self):
221 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
222 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
223 self.assertEqual(re.split("(:*)", ":a:b::c"),
224 ['', ':', 'a', ':', 'b', '::', 'c'])
225 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
226 self.assertEqual(re.split("(:)*", ":a:b::c"),
227 ['', ':', 'a', ':', 'b', ':', 'c'])
228 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
229 ['', ':', 'a', ':b::', 'c'])
230 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
231 ['', None, ':', 'a', None, ':', '', 'b', None, '',
232 None, '::', 'c'])
233 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
234 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000235
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000236 def test_qualified_re_split(self):
237 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
238 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
239 self.assertEqual(re.split("(:)", ":a:b::c", 2),
240 ['', ':', 'a', ':', 'b::c'])
241 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
242 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000243
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000244 def test_re_findall(self):
245 self.assertEqual(re.findall(":+", "abc"), [])
246 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
247 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
248 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
249 (":", ":"),
250 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000251
Skip Montanaro5ba00542003-04-25 16:00:14 +0000252 def test_bug_117612(self):
253 self.assertEqual(re.findall(r"(a|(b))", "aba"),
254 [("a", ""),("b", "b"),("a", "")])
255
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000256 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000257 self.assertEqual(re.match('a', 'a').groups(), ())
258 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
259 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
260 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
261 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000262
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000263 pat = re.compile('((a)|(b))(c)?')
264 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
265 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
266 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
267 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
268 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000269
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000270 # A single group
271 m = re.match('(a)', 'a')
272 self.assertEqual(m.group(0), 'a')
273 self.assertEqual(m.group(0), 'a')
274 self.assertEqual(m.group(1), 'a')
275 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000276
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000277 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
278 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
279 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
280 (None, 'b', None))
281 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000282
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000283 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
285 ('(', 'a'))
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
287 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300288 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
289 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
291 ('a', 'b'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
293 (None, 'd'))
294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
295 (None, 'd'))
296 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
297 ('a', ''))
298
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000299 # Tests for bug #1177831: exercise groups other than the first group
300 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
301 self.assertEqual(p.match('abc').groups(),
302 ('a', 'b', 'c'))
303 self.assertEqual(p.match('ad').groups(),
304 ('a', None, 'd'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300305 self.assertIsNone(p.match('abd'))
306 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000307
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000308
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000309 def test_re_groupref(self):
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
311 ('|', 'a'))
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
313 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300314 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
315 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
317 ('a', 'a'))
318 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
319 (None, None))
320
321 def test_groupdict(self):
322 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
323 'first second').groupdict(),
324 {'first':'first', 'second':'second'})
325
326 def test_expand(self):
327 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
328 "first second")
329 .expand(r"\2 \1 \g<second> \g<first>"),
330 "second first second first")
331
332 def test_repeat_minmax(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300333 self.assertIsNone(re.match("^(\w){1}$", "abc"))
334 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
335 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
336 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000337
338 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
344 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
345 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
346
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300347 self.assertIsNone(re.match("^x{1}$", "xxx"))
348 self.assertIsNone(re.match("^x{1}?$", "xxx"))
349 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
350 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000351
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300352 self.assertTrue(re.match("^x{3}$", "xxx"))
353 self.assertTrue(re.match("^x{1,3}$", "xxx"))
354 self.assertTrue(re.match("^x{1,4}$", "xxx"))
355 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
356 self.assertTrue(re.match("^x{3}?$", "xxx"))
357 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
358 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
359 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000360
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300361 self.assertIsNone(re.match("^x{}$", "xxx"))
362 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000363
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364 def test_getattr(self):
365 self.assertEqual(re.match("(a)", "a").pos, 0)
366 self.assertEqual(re.match("(a)", "a").endpos, 1)
367 self.assertEqual(re.match("(a)", "a").string, "a")
368 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300369 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000370
371 def test_special_escapes(self):
372 self.assertEqual(re.search(r"\b(b.)\b",
373 "abcd abc bcd bx").group(1), "bx")
374 self.assertEqual(re.search(r"\B(b.)\B",
375 "abc bcd bc abxd").group(1), "bx")
376 self.assertEqual(re.search(r"\b(b.)\b",
377 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
378 self.assertEqual(re.search(r"\B(b.)\B",
379 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300380 if have_unicode:
381 self.assertEqual(re.search(r"\b(b.)\b",
382 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
383 self.assertEqual(re.search(r"\B(b.)\B",
384 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000385 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
386 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300387 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000388 self.assertEqual(re.search(r"\b(b.)\b",
389 u"abcd abc bcd bx").group(1), "bx")
390 self.assertEqual(re.search(r"\B(b.)\B",
391 u"abc bcd bc abxd").group(1), "bx")
392 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
393 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300394 self.assertIsNone(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a").group(0), "1aa! a")
397 self.assertEqual(re.search(r"\d\D\w\W\s\S",
398 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300399 if have_unicode:
400 self.assertEqual(re.search(r"\d\D\w\W\s\S",
401 "1aa! a", re.UNICODE).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000402
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200403 def test_string_boundaries(self):
404 # See http://bugs.python.org/issue10713
405 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
406 "abc")
407 # There's a word boundary at the start of a string.
408 self.assertTrue(re.match(r"\b", "abc"))
409 # A non-empty string includes a non-boundary zero-length match.
410 self.assertTrue(re.search(r"\B", "abc"))
411 # There is no non-boundary match at the start of a string.
412 self.assertFalse(re.match(r"\B", "abc"))
413 # However, an empty string contains no word boundaries, and also no
414 # non-boundaries.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300415 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200416 # This one is questionable and different from the perlre behaviour,
417 # but describes current behavior.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300418 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200419 # A single word-character string has two boundaries, but no
420 # non-boundary gaps.
421 self.assertEqual(len(re.findall(r"\b", "a")), 2)
422 self.assertEqual(len(re.findall(r"\B", "a")), 0)
423 # If there are no words, there are no boundaries
424 self.assertEqual(len(re.findall(r"\b", " ")), 0)
425 self.assertEqual(len(re.findall(r"\b", " ")), 0)
426 # Can match around the whitespace.
427 self.assertEqual(len(re.findall(r"\B", " ")), 2)
428
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300429 @requires_unicode
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000430 def test_bigcharset(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300431 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
432 unichr(0x2222)).group(1), unichr(0x2222))
433 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
434 unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300435 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300436 self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000437
Antoine Pitroub83ea142012-11-20 22:30:42 +0100438 def test_big_codesize(self):
439 # Issue #1160
440 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300441 self.assertTrue(r.match('1000'))
442 self.assertTrue(r.match('9999'))
Antoine Pitroub83ea142012-11-20 22:30:42 +0100443
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000444 def test_anyall(self):
445 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
446 "a\nb")
447 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
448 "a\n\nb")
449
450 def test_non_consuming(self):
451 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
452 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
453 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
454 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
455 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
458
459 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
460 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
461 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
462 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
463
464 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000465 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
466 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000467 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
468 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
469 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
470 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
471 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
472 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
473 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
474 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
475
476 def test_category(self):
477 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
478
479 def test_getlower(self):
480 import _sre
481 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
482 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300483 if have_unicode:
484 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000485
486 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
487 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
488
489 def test_not_literal(self):
490 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
491 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
492
493 def test_search_coverage(self):
494 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
495 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
496
Ezio Melotti46645632011-03-25 14:50:52 +0200497 def assertMatch(self, pattern, text, match=None, span=None,
498 matcher=re.match):
499 if match is None and span is None:
500 # the pattern matches the whole text
501 match = text
502 span = (0, len(text))
503 elif match is None or span is None:
504 raise ValueError('If match is not None, span should be specified '
505 '(and vice versa).')
506 m = matcher(pattern, text)
507 self.assertTrue(m)
508 self.assertEqual(m.group(), match)
509 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000510
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300511 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200512 def test_re_escape(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300513 alnum_chars = unicode(string.ascii_letters + string.digits)
Ezio Melotti46645632011-03-25 14:50:52 +0200514 p = u''.join(unichr(i) for i in range(256))
515 for c in p:
516 if c in alnum_chars:
517 self.assertEqual(re.escape(c), c)
518 elif c == u'\x00':
519 self.assertEqual(re.escape(c), u'\\000')
520 else:
521 self.assertEqual(re.escape(c), u'\\' + c)
522 self.assertMatch(re.escape(c), c)
523 self.assertMatch(re.escape(p), p)
524
525 def test_re_escape_byte(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300526 alnum_chars = string.ascii_letters + string.digits
Ezio Melotti46645632011-03-25 14:50:52 +0200527 p = ''.join(chr(i) for i in range(256))
528 for b in p:
529 if b in alnum_chars:
530 self.assertEqual(re.escape(b), b)
531 elif b == b'\x00':
532 self.assertEqual(re.escape(b), b'\\000')
533 else:
534 self.assertEqual(re.escape(b), b'\\' + b)
535 self.assertMatch(re.escape(b), b)
536 self.assertMatch(re.escape(p), p)
537
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300538 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200539 def test_re_escape_non_ascii(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300540 s = u(r'xxx\u2620\u2620\u2620xxx')
Ezio Melotti46645632011-03-25 14:50:52 +0200541 s_escaped = re.escape(s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300542 self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
Ezio Melotti46645632011-03-25 14:50:52 +0200543 self.assertMatch(s_escaped, s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300544 self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
545 u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
Ezio Melotti46645632011-03-25 14:50:52 +0200546
547 def test_re_escape_non_ascii_bytes(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300548 b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
Ezio Melotti46645632011-03-25 14:50:52 +0200549 b_escaped = re.escape(b)
550 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
551 self.assertMatch(b_escaped, b)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300552 res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
Ezio Melotti46645632011-03-25 14:50:52 +0200553 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000554
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000555 def test_pickling(self):
556 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000557 self.pickle_test(pickle)
558 import cPickle
559 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000560 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000561 import_module("sre", deprecated=True)
562 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000563
564 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000565 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
566 s = pickle.dumps(oldpat)
567 newpat = pickle.loads(s)
568 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000569
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000570 def test_constants(self):
571 self.assertEqual(re.I, re.IGNORECASE)
572 self.assertEqual(re.L, re.LOCALE)
573 self.assertEqual(re.M, re.MULTILINE)
574 self.assertEqual(re.S, re.DOTALL)
575 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000576
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000577 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000578 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300579 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000580
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000581 def test_sre_character_literals(self):
582 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300583 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
584 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
585 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
586 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
587 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
588 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000589 self.assertRaises(re.error, re.match, "\911", "")
590
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000591 def test_sre_character_class_literals(self):
592 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300593 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
594 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
595 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
596 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
597 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
598 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000599 self.assertRaises(re.error, re.match, "[\911]", "")
600
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000601 def test_bug_113254(self):
602 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
603 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
604 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
605
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000606 def test_bug_527371(self):
607 # bug described in patches 527371/672491
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300608 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000609 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
610 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
611 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
612 self.assertEqual(re.match("((a))", "a").lastindex, 1)
613
614 def test_bug_545855(self):
615 # bug 545855 -- This pattern failed to cause a compile error as it
616 # should, instead provoking a TypeError.
617 self.assertRaises(re.error, re.compile, 'foo[a-')
618
619 def test_bug_418626(self):
620 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
621 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
622 # pattern '*?' on a long string.
623 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
624 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
625 20003)
626 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000627 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000628 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000629 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000630
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300631 @requires_unicode
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000632 def test_bug_612074(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300633 pat=u"["+re.escape(unichr(0x2039))+u"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000634 self.assertEqual(re.compile(pat) and 1, 1)
635
Skip Montanaro1e703c62003-04-25 15:40:28 +0000636 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000637 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000638 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000639 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
640 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
641 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000642
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200643 def test_unlimited_zero_width_repeat(self):
644 # Issue #9669
645 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
646 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
647 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
648 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
649 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
650 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
651
Skip Montanaro1e703c62003-04-25 15:40:28 +0000652 def test_scanner(self):
653 def s_ident(scanner, token): return token
654 def s_operator(scanner, token): return "op%s" % token
655 def s_float(scanner, token): return float(token)
656 def s_int(scanner, token): return int(token)
657
658 scanner = Scanner([
659 (r"[a-zA-Z_]\w*", s_ident),
660 (r"\d+\.\d*", s_float),
661 (r"\d+", s_int),
662 (r"=|\+|-|\*|/", s_operator),
663 (r"\s+", None),
664 ])
665
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300666 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000667
Skip Montanaro1e703c62003-04-25 15:40:28 +0000668 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
669 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
670 'op+', 'bar'], ''))
671
Skip Montanaro5ba00542003-04-25 16:00:14 +0000672 def test_bug_448951(self):
673 # bug 448951 (similar to 429357, but with single char match)
674 # (Also test greedy matches.)
675 for op in '','?','*':
676 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
677 (None, None))
678 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
679 ('a:', 'a'))
680
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000681 def test_bug_725106(self):
682 # capturing groups in alternatives in repeats
683 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
684 ('b', 'a'))
685 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
686 ('c', 'b'))
687 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
688 ('b', None))
689 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
690 ('b', None))
691 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
692 ('b', 'a'))
693 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
694 ('c', 'b'))
695 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
696 ('b', None))
697 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
698 ('b', None))
699
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000700 def test_bug_725149(self):
701 # mark_stack_base restoring before restoring marks
702 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
703 ('a', None))
704 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
705 ('a', None, None))
706
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300707 @requires_unicode
Just van Rossum12723ba2003-07-02 20:03:04 +0000708 def test_bug_764548(self):
709 # bug 764548, re.compile() barfs on str/unicode subclasses
Just van Rossum12723ba2003-07-02 20:03:04 +0000710 class my_unicode(unicode): pass
711 pat = re.compile(my_unicode("abc"))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300712 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000713
Skip Montanaro5ba00542003-04-25 16:00:14 +0000714 def test_finditer(self):
715 iter = re.finditer(r":+", "a:b::c:::d")
716 self.assertEqual([item.group(0) for item in iter],
717 [":", "::", ":::"])
718
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300719 @requires_unicode
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000720 def test_bug_926075(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300721 self.assertIsNot(re.compile('bug_926075'),
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300722 re.compile(u'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000723
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300724 @requires_unicode
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000725 def test_bug_931848(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300726 pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000727 self.assertEqual(re.compile(pattern).split("a.b.c"),
728 ['a','b','c'])
729
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000730 def test_bug_581080(self):
731 iter = re.finditer(r"\s", "a b")
732 self.assertEqual(iter.next().span(), (1,2))
733 self.assertRaises(StopIteration, iter.next)
734
735 scanner = re.compile(r"\s").scanner("a b")
736 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300737 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000738
739 def test_bug_817234(self):
740 iter = re.finditer(r".*", "asdf")
741 self.assertEqual(iter.next().span(), (0, 4))
742 self.assertEqual(iter.next().span(), (4, 4))
743 self.assertRaises(StopIteration, iter.next)
744
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300745 @requires_unicode
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000746 def test_bug_6561(self):
747 # '\d' should match characters in Unicode category 'Nd'
748 # (Number, Decimal Digit), but not those in 'Nl' (Number,
749 # Letter) or 'No' (Number, Other).
750 decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300751 unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
752 unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
753 unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000754 ]
755 for x in decimal_digits:
756 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
757
758 not_decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300759 unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
760 unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
761 unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
762 unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000763 ]
764 for x in not_decimal_digits:
765 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
766
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000767 def test_empty_array(self):
768 # SF buf 1647541
769 import array
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300770 typecodes = 'cbBhHiIlLfd'
771 if have_unicode:
772 typecodes += 'u'
773 for typecode in typecodes:
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000774 a = array.array(typecode)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300775 self.assertIsNone(re.compile("bla").match(a))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000776 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000777
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300778 @requires_unicode
Guido van Rossumae04c332008-01-03 19:12:44 +0000779 def test_inline_flags(self):
780 # Bug #1700
781 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
782 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
783
784 p = re.compile(upper_char, re.I | re.U)
785 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300786 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000787
788 p = re.compile(lower_char, re.I | re.U)
789 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300790 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000791
792 p = re.compile('(?i)' + upper_char, re.U)
793 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300794 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000795
796 p = re.compile('(?i)' + lower_char, re.U)
797 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300798 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000799
800 p = re.compile('(?iu)' + upper_char)
801 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300802 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000803
804 p = re.compile('(?iu)' + lower_char)
805 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300806 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000807
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000808 def test_dollar_matches_twice(self):
809 "$ matches the end of string, and just before the terminating \n"
810 pattern = re.compile('$')
811 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
812 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
813 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
814
815 pattern = re.compile('$', re.MULTILINE)
816 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
817 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
818 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
819
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000820 def test_dealloc(self):
821 # issue 3299: check for segfault in debug build
822 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000823 # the overflow limit is different on wide and narrow builds and it
824 # depends on the definition of SRE_CODE (see sre.h).
825 # 2**128 should be big enough to overflow on both. For smaller values
826 # a RuntimeError is raised instead of OverflowError.
827 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000828 self.assertRaises(TypeError, re.finditer, "a", {})
829 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000830
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200831 def test_compile(self):
832 # Test return value when given string and pattern as parameter
833 pattern = re.compile('random pattern')
834 self.assertIsInstance(pattern, re._pattern_type)
835 same_pattern = re.compile(pattern)
836 self.assertIsInstance(same_pattern, re._pattern_type)
837 self.assertIs(same_pattern, pattern)
838 # Test behaviour when not given a string or pattern as parameter
839 self.assertRaises(TypeError, re.compile, 0)
840
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200841 def test_bug_13899(self):
842 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
843 # nothing. Ditto B and Z.
844 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
845 ['A', 'B', '\b', 'C', 'Z'])
846
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100847 @precisionbigmemtest(size=_2G, memuse=1)
848 def test_large_search(self, size):
849 # Issue #10182: indices were 32-bit-truncated.
850 s = 'a' * size
851 m = re.search('$', s)
852 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100853 self.assertEqual(m.start(), size)
854 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100855
Antoine Pitroub83575b2012-12-02 12:52:36 +0100856 # The huge memuse is because of re.sub() using a list and a join()
857 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100858 @precisionbigmemtest(size=_2G, memuse=16 + 2)
859 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100860 # Issue #10182: indices were 32-bit-truncated.
861 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100862 r, n = re.subn('', '', s)
863 self.assertEqual(r, s)
864 self.assertEqual(n, size + 1)
865
866
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200867 def test_repeat_minmax_overflow(self):
868 # Issue #13169
869 string = "x" * 100000
870 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
871 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
872 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
873 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
874 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
875 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
876 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
877 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
878 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
879 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
880 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
881
882 @cpython_only
883 def test_repeat_minmax_overflow_maxrepeat(self):
884 try:
885 from _sre import MAXREPEAT
886 except ImportError:
887 self.skipTest('requires _sre.MAXREPEAT constant')
888 string = "x" * 100000
889 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
890 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
891 (0, 100000))
892 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
893 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
894 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
895 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
896
R David Murray60773392013-04-14 13:08:50 -0400897 def test_backref_group_name_in_exception(self):
898 # Issue 17341: Poor error message when compiling invalid regex
899 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
900 re.compile('(?P=<foo>)')
901
902 def test_group_name_in_exception(self):
903 # Issue 17341: Poor error message when compiling invalid regex
904 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
905 re.compile('(?P<?foo>)')
906
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300907 def test_issue17998(self):
908 for reps in '*', '+', '?', '{1}':
909 for mod in '', '?':
910 pattern = '.' + reps + mod + 'yz'
911 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
912 ['xyz'], msg=pattern)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300913 if have_unicode:
914 pattern = unicode(pattern)
915 self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
916 [u'xyz'], msg=pattern)
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300917
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200918
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300919 def test_bug_2537(self):
920 # issue 2537: empty submatches
921 for outer_op in ('{0,}', '*', '+', '{1,187}'):
922 for inner_op in ('{0,}', '*', '?'):
923 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
924 m = r.match("xyyzy")
925 self.assertEqual(m.group(0), "xyy")
926 self.assertEqual(m.group(1), "")
927 self.assertEqual(m.group(2), "y")
928
Antoine Pitrouf5814112014-02-03 20:59:59 +0100929 def test_debug_flag(self):
930 with captured_stdout() as out:
931 re.compile('foo', re.DEBUG)
932 self.assertEqual(out.getvalue().splitlines(),
933 ['literal 102', 'literal 111', 'literal 111'])
934 # Debug output is output again even a second time (bypassing
935 # the cache -- issue #20426).
936 with captured_stdout() as out:
937 re.compile('foo', re.DEBUG)
938 self.assertEqual(out.getvalue().splitlines(),
939 ['literal 102', 'literal 111', 'literal 111'])
940
Serhiy Storchakae50fe4c2014-03-06 12:24:29 +0200941 def test_keyword_parameters(self):
942 # Issue #20283: Accepting the string keyword parameter.
943 pat = re.compile(r'(ab)')
944 self.assertEqual(
945 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
946 self.assertEqual(
947 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
948 self.assertEqual(
949 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
950 self.assertEqual(
951 pat.split(string='abracadabra', maxsplit=1),
952 ['', 'ab', 'racadabra'])
953
Antoine Pitrouf5814112014-02-03 20:59:59 +0100954
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000955def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000956 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000957 if verbose:
958 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000959 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000960 # To save time, only run the first and last 10 tests
961 #tests = tests[:10] + tests[-10:]
962 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000963
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000964 for t in tests:
965 sys.stdout.flush()
966 pattern = s = outcome = repl = expected = None
967 if len(t) == 5:
968 pattern, s, outcome, repl, expected = t
969 elif len(t) == 3:
970 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000971 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000972 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
973
Guido van Rossum41360a41998-03-26 19:42:58 +0000974 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000975 obj = re.compile(pattern)
976 except re.error:
977 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000978 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000979 print '=== Syntax error:', t
980 except KeyboardInterrupt: raise KeyboardInterrupt
981 except:
982 print '*** Unexpected error ***', t
983 if verbose:
984 traceback.print_exc(file=sys.stdout)
985 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000986 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000987 result = obj.search(s)
988 except re.error, msg:
989 print '=== Unexpected exception', t, repr(msg)
990 if outcome == SYNTAX_ERROR:
991 # This should have been a syntax error; forget it.
992 pass
993 elif outcome == FAIL:
994 if result is None: pass # No match, as expected
995 else: print '=== Succeeded incorrectly', t
996 elif outcome == SUCCEED:
997 if result is not None:
998 # Matched, as expected, so now we compute the
999 # result string and compare it to our expected result.
1000 start, end = result.span(0)
1001 vardict={'found': result.group(0),
1002 'groups': result.group(),
1003 'flags': result.re.flags}
1004 for i in range(1, 100):
1005 try:
1006 gi = result.group(i)
1007 # Special hack because else the string concat fails:
1008 if gi is None:
1009 gi = "None"
1010 except IndexError:
1011 gi = "Error"
1012 vardict['g%d' % i] = gi
1013 for i in result.re.groupindex.keys():
1014 try:
1015 gi = result.group(i)
1016 if gi is None:
1017 gi = "None"
1018 except IndexError:
1019 gi = "Error"
1020 vardict[i] = gi
1021 repl = eval(repl, vardict)
1022 if repl != expected:
1023 print '=== grouping error', t,
1024 print repr(repl) + ' should be ' + repr(expected)
1025 else:
1026 print '=== Failed incorrectly', t
1027
1028 # Try the match on a unicode string, and check that it
1029 # still succeeds.
1030 try:
1031 result = obj.search(unicode(s, "latin-1"))
1032 if result is None:
1033 print '=== Fails on unicode match', t
1034 except NameError:
1035 continue # 1.5.2
1036 except TypeError:
1037 continue # unicode test case
1038
1039 # Try the match on a unicode pattern, and check that it
1040 # still succeeds.
1041 obj=re.compile(unicode(pattern, "latin-1"))
1042 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001043 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001044 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001045
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001046 # Try the match with the search area limited to the extent
1047 # of the match and see if it still succeeds. \B will
1048 # break (because it won't match at the end or start of a
1049 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001050
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001051 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1052 and result is not None:
1053 obj = re.compile(pattern)
1054 result = obj.search(s, result.start(0), result.end(0) + 1)
1055 if result is None:
1056 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001057
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001058 # Try the match with IGNORECASE enabled, and check that it
1059 # still succeeds.
1060 obj = re.compile(pattern, re.IGNORECASE)
1061 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001062 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001063 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001064
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001065 # Try the match with LOCALE enabled, and check that it
1066 # still succeeds.
1067 obj = re.compile(pattern, re.LOCALE)
1068 result = obj.search(s)
1069 if result is None:
1070 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001071
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001072 # Try the match with UNICODE locale enabled, and check
1073 # that it still succeeds.
1074 obj = re.compile(pattern, re.UNICODE)
1075 result = obj.search(s)
1076 if result is None:
1077 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001078
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001079def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001080 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001081 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001082
1083if __name__ == "__main__":
1084 test_main()