blob: 7bdf3534ae2733ff3c1ed441bd0ffb59ef431b81 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02002from test.test_support import precisionbigmemtest, _2G, cpython_only
Serhiy Storchaka7644ff12014-09-14 17:40:44 +03003from test.test_support import captured_stdout, have_unicode, requires_unicode, u
Guido van Rossum8e0ce301997-07-11 19:34:44 +00004import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00005from re import Scanner
R David Murray60773392013-04-14 13:08:50 -04006import sre_constants
Ezio Melotti46645632011-03-25 14:50:52 +02007import sys
8import string
9import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +000010from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +000011
Antoine Pitrou735f36e2012-12-03 20:53:12 +010012
Guido van Rossum23b22571997-07-17 22:36:14 +000013# Misc tests from Tim Peters' re.doc
14
Just van Rossum6802c6e2003-07-02 14:36:59 +000015# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020016# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000017# cover most of the code.
18
Skip Montanaro8ed06da2003-04-24 19:43:18 +000019import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000020
Skip Montanaro8ed06da2003-04-24 19:43:18 +000021class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000022
23 def test_weakref(self):
24 s = 'QabbbcR'
25 x = re.compile('ab+c')
26 y = proxy(x)
27 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
28
Skip Montanaro8ed06da2003-04-24 19:43:18 +000029 def test_search_star_plus(self):
30 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
31 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
32 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
33 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030034 self.assertIsNone(re.search('x', 'aaa'))
Skip Montanaro8ed06da2003-04-24 19:43:18 +000035 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
36 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
37 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
38 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +030039 self.assertIsNone(re.match('a+', 'xxx'))
Guido van Rossum8430c581998-04-03 21:47:12 +000040
Skip Montanaro8ed06da2003-04-24 19:43:18 +000041 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000042 int_value = int(matchobj.group(0))
43 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000044
Skip Montanaro8ed06da2003-04-24 19:43:18 +000045 def test_basic_re_sub(self):
46 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
47 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
48 '9.3 -3 24x100y')
49 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
50 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
53 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000054
Skip Montanaro8ed06da2003-04-24 19:43:18 +000055 s = r"\1\1"
56 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
57 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
58 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000059
Skip Montanaro8ed06da2003-04-24 19:43:18 +000060 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
63 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000064
Skip Montanaro8ed06da2003-04-24 19:43:18 +000065 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
66 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
68 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
69 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000070
Skip Montanaro8ed06da2003-04-24 19:43:18 +000071 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000072
Skip Montanaro2726fcd2003-04-25 14:31:54 +000073 def test_bug_449964(self):
74 # fails for group followed by other escape
75 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
76 'xx\bxx\b')
77
78 def test_bug_449000(self):
79 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000080 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n')
82 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n')
84 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
85 'abc\ndef\n')
86 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
87 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000088
Serhiy Storchaka7644ff12014-09-14 17:40:44 +030089 @requires_unicode
Guido van Rossum1ff91d92007-09-10 22:02:25 +000090 def test_bug_1140(self):
91 # re.sub(x, y, u'') should return u'', not '', and
92 # re.sub(x, y, '') should return '', not u''.
93 # Also:
94 # re.sub(x, y, unicode(x)) should return unicode(y), and
95 # re.sub(x, y, str(x)) should return
96 # str(y) if isinstance(y, str) else unicode(y).
97 for x in 'x', u'x':
98 for y in 'y', u'y':
99 z = re.sub(x, y, u'')
100 self.assertEqual(z, u'')
101 self.assertEqual(type(z), unicode)
102 #
103 z = re.sub(x, y, '')
104 self.assertEqual(z, '')
105 self.assertEqual(type(z), str)
106 #
107 z = re.sub(x, y, unicode(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), unicode)
110 #
111 z = re.sub(x, y, str(x))
112 self.assertEqual(z, y)
113 self.assertEqual(type(z), type(y))
114
Raymond Hettinger80016c92007-12-19 18:13:31 +0000115 def test_bug_1661(self):
116 # Verify that flags do not get silently ignored with compiled patterns
117 pattern = re.compile('.')
118 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
120 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
121 self.assertRaises(ValueError, re.compile, pattern, re.I)
122
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000123 def test_bug_3629(self):
124 # A regex that triggered a bug in the sre-code validator
125 re.compile("(?P<quote>)(?(quote))")
126
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000127 def test_sub_template_numeric_escape(self):
128 # bug 776311 and friends
129 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
130 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
131 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
132 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
133 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
134 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
135 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
136
137 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
138 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
139
140 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
141 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
142 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
143 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
144 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
145
146 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
147 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000148
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000149 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
156 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
158 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
159 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
160 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
161
162 # in python2.3 (etc), these loop endlessly in sre_parser.py
163 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
165 'xz8')
166 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
167 'xza')
168
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000169 def test_qualified_re_sub(self):
170 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
171 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000172
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000173 def test_bug_114660(self):
174 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
175 'hello there')
176
177 def test_bug_462270(self):
178 # Test for empty sub() behaviour, see SF bug #462270
179 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
180 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
181
Ezio Melottief317382012-11-03 20:31:12 +0200182 def test_symbolic_groups(self):
183 re.compile('(?P<a>x)(?P=a)(?(a)y)')
184 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
185 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
186 self.assertRaises(re.error, re.compile, '(?Px)')
187 self.assertRaises(re.error, re.compile, '(?P=)')
188 self.assertRaises(re.error, re.compile, '(?P=1)')
189 self.assertRaises(re.error, re.compile, '(?P=a)')
190 self.assertRaises(re.error, re.compile, '(?P=a1)')
191 self.assertRaises(re.error, re.compile, '(?P=a.)')
192 self.assertRaises(re.error, re.compile, '(?P<)')
193 self.assertRaises(re.error, re.compile, '(?P<>)')
194 self.assertRaises(re.error, re.compile, '(?P<1>)')
195 self.assertRaises(re.error, re.compile, '(?P<a.>)')
196 self.assertRaises(re.error, re.compile, '(?())')
197 self.assertRaises(re.error, re.compile, '(?(a))')
198 self.assertRaises(re.error, re.compile, '(?(1a))')
199 self.assertRaises(re.error, re.compile, '(?(a.))')
200
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000201 def test_symbolic_refs(self):
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200206 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000207 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
208 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
209 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
210 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000211 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000212
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000213 def test_re_subn(self):
214 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
215 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
216 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
217 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
218 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000219
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000220 def test_re_split(self):
221 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
222 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
223 self.assertEqual(re.split("(:*)", ":a:b::c"),
224 ['', ':', 'a', ':', 'b', '::', 'c'])
225 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
226 self.assertEqual(re.split("(:)*", ":a:b::c"),
227 ['', ':', 'a', ':', 'b', ':', 'c'])
228 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
229 ['', ':', 'a', ':b::', 'c'])
230 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
231 ['', None, ':', 'a', None, ':', '', 'b', None, '',
232 None, '::', 'c'])
233 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
234 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000235
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000236 def test_qualified_re_split(self):
237 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
238 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
239 self.assertEqual(re.split("(:)", ":a:b::c", 2),
240 ['', ':', 'a', ':', 'b::c'])
241 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
242 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000243
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000244 def test_re_findall(self):
245 self.assertEqual(re.findall(":+", "abc"), [])
246 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
247 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
248 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
249 (":", ":"),
250 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000251
Skip Montanaro5ba00542003-04-25 16:00:14 +0000252 def test_bug_117612(self):
253 self.assertEqual(re.findall(r"(a|(b))", "aba"),
254 [("a", ""),("b", "b"),("a", "")])
255
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000256 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000257 self.assertEqual(re.match('a', 'a').groups(), ())
258 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
259 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
260 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
261 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000262
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000263 pat = re.compile('((a)|(b))(c)?')
264 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
265 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
266 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
267 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
268 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000269
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000270 # A single group
271 m = re.match('(a)', 'a')
272 self.assertEqual(m.group(0), 'a')
273 self.assertEqual(m.group(0), 'a')
274 self.assertEqual(m.group(1), 'a')
275 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000276
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000277 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
278 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
279 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
280 (None, 'b', None))
281 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000282
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000283 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
285 ('(', 'a'))
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
287 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300288 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
289 self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
291 ('a', 'b'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
293 (None, 'd'))
294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
295 (None, 'd'))
296 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
297 ('a', ''))
298
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000299 # Tests for bug #1177831: exercise groups other than the first group
300 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
301 self.assertEqual(p.match('abc').groups(),
302 ('a', 'b', 'c'))
303 self.assertEqual(p.match('ad').groups(),
304 ('a', None, 'd'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300305 self.assertIsNone(p.match('abd'))
306 self.assertIsNone(p.match('ac'))
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000307
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000308
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000309 def test_re_groupref(self):
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
311 ('|', 'a'))
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
313 (None, 'a'))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300314 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))
315 self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
317 ('a', 'a'))
318 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
319 (None, None))
320
321 def test_groupdict(self):
322 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
323 'first second').groupdict(),
324 {'first':'first', 'second':'second'})
325
326 def test_expand(self):
327 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
328 "first second")
329 .expand(r"\2 \1 \g<second> \g<first>"),
330 "second first second first")
331
332 def test_repeat_minmax(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300333 self.assertIsNone(re.match("^(\w){1}$", "abc"))
334 self.assertIsNone(re.match("^(\w){1}?$", "abc"))
335 self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
336 self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000337
338 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
344 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
345 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
346
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300347 self.assertIsNone(re.match("^x{1}$", "xxx"))
348 self.assertIsNone(re.match("^x{1}?$", "xxx"))
349 self.assertIsNone(re.match("^x{1,2}$", "xxx"))
350 self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000351
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300352 self.assertTrue(re.match("^x{3}$", "xxx"))
353 self.assertTrue(re.match("^x{1,3}$", "xxx"))
354 self.assertTrue(re.match("^x{1,4}$", "xxx"))
355 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
356 self.assertTrue(re.match("^x{3}?$", "xxx"))
357 self.assertTrue(re.match("^x{1,3}?$", "xxx"))
358 self.assertTrue(re.match("^x{1,4}?$", "xxx"))
359 self.assertTrue(re.match("^x{3,4}?$", "xxx"))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000360
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300361 self.assertIsNone(re.match("^x{}$", "xxx"))
362 self.assertTrue(re.match("^x{}$", "x{}"))
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000363
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000364 def test_getattr(self):
365 self.assertEqual(re.match("(a)", "a").pos, 0)
366 self.assertEqual(re.match("(a)", "a").endpos, 1)
367 self.assertEqual(re.match("(a)", "a").string, "a")
368 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300369 self.assertTrue(re.match("(a)", "a").re)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000370
371 def test_special_escapes(self):
372 self.assertEqual(re.search(r"\b(b.)\b",
373 "abcd abc bcd bx").group(1), "bx")
374 self.assertEqual(re.search(r"\B(b.)\B",
375 "abc bcd bc abxd").group(1), "bx")
376 self.assertEqual(re.search(r"\b(b.)\b",
377 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
378 self.assertEqual(re.search(r"\B(b.)\B",
379 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300380 if have_unicode:
381 self.assertEqual(re.search(r"\b(b.)\b",
382 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
383 self.assertEqual(re.search(r"\B(b.)\B",
384 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000385 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
386 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300387 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000388 self.assertEqual(re.search(r"\b(b.)\b",
389 u"abcd abc bcd bx").group(1), "bx")
390 self.assertEqual(re.search(r"\B(b.)\B",
391 u"abc bcd bc abxd").group(1), "bx")
392 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
393 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300394 self.assertIsNone(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000395 self.assertEqual(re.search(r"\d\D\w\W\s\S",
396 "1aa! a").group(0), "1aa! a")
397 self.assertEqual(re.search(r"\d\D\w\W\s\S",
398 "1aa! a", re.LOCALE).group(0), "1aa! a")
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300399 if have_unicode:
400 self.assertEqual(re.search(r"\d\D\w\W\s\S",
401 "1aa! a", re.UNICODE).group(0), "1aa! a")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000402
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200403 def test_string_boundaries(self):
404 # See http://bugs.python.org/issue10713
405 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
406 "abc")
407 # There's a word boundary at the start of a string.
408 self.assertTrue(re.match(r"\b", "abc"))
409 # A non-empty string includes a non-boundary zero-length match.
410 self.assertTrue(re.search(r"\B", "abc"))
411 # There is no non-boundary match at the start of a string.
412 self.assertFalse(re.match(r"\B", "abc"))
413 # However, an empty string contains no word boundaries, and also no
414 # non-boundaries.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300415 self.assertIsNone(re.search(r"\B", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200416 # This one is questionable and different from the perlre behaviour,
417 # but describes current behavior.
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300418 self.assertIsNone(re.search(r"\b", ""))
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200419 # A single word-character string has two boundaries, but no
420 # non-boundary gaps.
421 self.assertEqual(len(re.findall(r"\b", "a")), 2)
422 self.assertEqual(len(re.findall(r"\B", "a")), 0)
423 # If there are no words, there are no boundaries
424 self.assertEqual(len(re.findall(r"\b", " ")), 0)
425 self.assertEqual(len(re.findall(r"\b", " ")), 0)
426 # Can match around the whitespace.
427 self.assertEqual(len(re.findall(r"\B", " ")), 2)
428
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300429 @requires_unicode
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000430 def test_bigcharset(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300431 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
432 unichr(0x2222)).group(1), unichr(0x2222))
433 self.assertEqual(re.match(u(r"([\u2222\u2223])"),
434 unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
Serhiy Storchaka22fb0de2013-10-24 22:02:42 +0300435 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300436 self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000437
Antoine Pitroub83ea142012-11-20 22:30:42 +0100438 def test_big_codesize(self):
439 # Issue #1160
440 r = re.compile('|'.join(('%d'%x for x in range(10000))))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300441 self.assertTrue(r.match('1000'))
442 self.assertTrue(r.match('9999'))
Antoine Pitroub83ea142012-11-20 22:30:42 +0100443
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000444 def test_anyall(self):
445 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
446 "a\nb")
447 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
448 "a\n\nb")
449
450 def test_non_consuming(self):
451 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
452 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
453 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
454 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
455 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
458
459 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
460 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
461 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
462 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
463
464 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000465 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
466 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000467 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
468 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
469 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
470 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
471 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
472 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
473 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
474 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
475
476 def test_category(self):
477 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
478
479 def test_getlower(self):
480 import _sre
481 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
482 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300483 if have_unicode:
484 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000485
486 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
487 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
488
489 def test_not_literal(self):
490 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
491 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
492
493 def test_search_coverage(self):
494 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
495 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
496
Ezio Melotti46645632011-03-25 14:50:52 +0200497 def assertMatch(self, pattern, text, match=None, span=None,
498 matcher=re.match):
499 if match is None and span is None:
500 # the pattern matches the whole text
501 match = text
502 span = (0, len(text))
503 elif match is None or span is None:
504 raise ValueError('If match is not None, span should be specified '
505 '(and vice versa).')
506 m = matcher(pattern, text)
507 self.assertTrue(m)
508 self.assertEqual(m.group(), match)
509 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000510
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300511 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200512 def test_re_escape(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300513 alnum_chars = unicode(string.ascii_letters + string.digits)
Ezio Melotti46645632011-03-25 14:50:52 +0200514 p = u''.join(unichr(i) for i in range(256))
515 for c in p:
516 if c in alnum_chars:
517 self.assertEqual(re.escape(c), c)
518 elif c == u'\x00':
519 self.assertEqual(re.escape(c), u'\\000')
520 else:
521 self.assertEqual(re.escape(c), u'\\' + c)
522 self.assertMatch(re.escape(c), c)
523 self.assertMatch(re.escape(p), p)
524
525 def test_re_escape_byte(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300526 alnum_chars = string.ascii_letters + string.digits
Ezio Melotti46645632011-03-25 14:50:52 +0200527 p = ''.join(chr(i) for i in range(256))
528 for b in p:
529 if b in alnum_chars:
530 self.assertEqual(re.escape(b), b)
531 elif b == b'\x00':
532 self.assertEqual(re.escape(b), b'\\000')
533 else:
534 self.assertEqual(re.escape(b), b'\\' + b)
535 self.assertMatch(re.escape(b), b)
536 self.assertMatch(re.escape(p), p)
537
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300538 @requires_unicode
Ezio Melotti46645632011-03-25 14:50:52 +0200539 def test_re_escape_non_ascii(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300540 s = u(r'xxx\u2620\u2620\u2620xxx')
Ezio Melotti46645632011-03-25 14:50:52 +0200541 s_escaped = re.escape(s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300542 self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
Ezio Melotti46645632011-03-25 14:50:52 +0200543 self.assertMatch(s_escaped, s)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300544 self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
545 u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
Ezio Melotti46645632011-03-25 14:50:52 +0200546
547 def test_re_escape_non_ascii_bytes(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300548 b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
Ezio Melotti46645632011-03-25 14:50:52 +0200549 b_escaped = re.escape(b)
550 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
551 self.assertMatch(b_escaped, b)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300552 res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
Ezio Melotti46645632011-03-25 14:50:52 +0200553 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000554
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000555 def test_pickling(self):
556 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000557 self.pickle_test(pickle)
558 import cPickle
559 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000560 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000561 import_module("sre", deprecated=True)
562 from sre import _compile
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300563 # current pickle expects the _compile() reconstructor in re module
564 from re import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000565
566 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000567 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
Serhiy Storchaka038fac62014-09-15 11:35:06 +0300568 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
569 pickled = pickle.dumps(oldpat, proto)
570 newpat = pickle.loads(pickled)
571 self.assertEqual(newpat, oldpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000572
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000573 def test_constants(self):
574 self.assertEqual(re.I, re.IGNORECASE)
575 self.assertEqual(re.L, re.LOCALE)
576 self.assertEqual(re.M, re.MULTILINE)
577 self.assertEqual(re.S, re.DOTALL)
578 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000579
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000580 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000581 for flag in [re.I, re.M, re.X, re.S, re.L]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300582 self.assertTrue(re.compile('^pattern$', flag))
Guido van Rossumf473cb01998-01-14 16:42:17 +0000583
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000584 def test_sre_character_literals(self):
585 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300586 self.assertTrue(re.match(r"\%03o" % i, chr(i)))
587 self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))
588 self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))
589 self.assertTrue(re.match(r"\x%02x" % i, chr(i)))
590 self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))
591 self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000592 self.assertRaises(re.error, re.match, "\911", "")
593
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000594 def test_sre_character_class_literals(self):
595 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300596 self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))
597 self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))
598 self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))
599 self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))
600 self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))
601 self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000602 self.assertRaises(re.error, re.match, "[\911]", "")
603
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000604 def test_bug_113254(self):
605 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
606 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
607 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
608
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000609 def test_bug_527371(self):
610 # bug described in patches 527371/672491
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300611 self.assertIsNone(re.match(r'(a)?a','a').lastindex)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000612 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
613 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
614 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
615 self.assertEqual(re.match("((a))", "a").lastindex, 1)
616
617 def test_bug_545855(self):
618 # bug 545855 -- This pattern failed to cause a compile error as it
619 # should, instead provoking a TypeError.
620 self.assertRaises(re.error, re.compile, 'foo[a-')
621
622 def test_bug_418626(self):
623 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
624 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
625 # pattern '*?' on a long string.
626 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
627 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
628 20003)
629 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000630 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000631 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000632 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000633
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300634 @requires_unicode
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000635 def test_bug_612074(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300636 pat=u"["+re.escape(unichr(0x2039))+u"]"
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000637 self.assertEqual(re.compile(pat) and 1, 1)
638
Skip Montanaro1e703c62003-04-25 15:40:28 +0000639 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000640 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000641 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000642 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
643 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
644 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000645
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +0200646 def test_unlimited_zero_width_repeat(self):
647 # Issue #9669
648 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
649 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
650 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
651 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
652 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
653 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
654
Skip Montanaro1e703c62003-04-25 15:40:28 +0000655 def test_scanner(self):
656 def s_ident(scanner, token): return token
657 def s_operator(scanner, token): return "op%s" % token
658 def s_float(scanner, token): return float(token)
659 def s_int(scanner, token): return int(token)
660
661 scanner = Scanner([
662 (r"[a-zA-Z_]\w*", s_ident),
663 (r"\d+\.\d*", s_float),
664 (r"\d+", s_int),
665 (r"=|\+|-|\*|/", s_operator),
666 (r"\s+", None),
667 ])
668
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300669 self.assertTrue(scanner.scanner.scanner("").pattern)
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000670
Skip Montanaro1e703c62003-04-25 15:40:28 +0000671 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
672 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
673 'op+', 'bar'], ''))
674
Skip Montanaro5ba00542003-04-25 16:00:14 +0000675 def test_bug_448951(self):
676 # bug 448951 (similar to 429357, but with single char match)
677 # (Also test greedy matches.)
678 for op in '','?','*':
679 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
680 (None, None))
681 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
682 ('a:', 'a'))
683
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000684 def test_bug_725106(self):
685 # capturing groups in alternatives in repeats
686 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
687 ('b', 'a'))
688 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
689 ('c', 'b'))
690 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
691 ('b', None))
692 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
693 ('b', None))
694 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
695 ('b', 'a'))
696 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
697 ('c', 'b'))
698 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
699 ('b', None))
700 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
701 ('b', None))
702
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000703 def test_bug_725149(self):
704 # mark_stack_base restoring before restoring marks
705 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
706 ('a', None))
707 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
708 ('a', None, None))
709
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300710 @requires_unicode
Just van Rossum12723ba2003-07-02 20:03:04 +0000711 def test_bug_764548(self):
712 # bug 764548, re.compile() barfs on str/unicode subclasses
Just van Rossum12723ba2003-07-02 20:03:04 +0000713 class my_unicode(unicode): pass
714 pat = re.compile(my_unicode("abc"))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300715 self.assertIsNone(pat.match("xyz"))
Just van Rossum12723ba2003-07-02 20:03:04 +0000716
Skip Montanaro5ba00542003-04-25 16:00:14 +0000717 def test_finditer(self):
718 iter = re.finditer(r":+", "a:b::c:::d")
719 self.assertEqual([item.group(0) for item in iter],
720 [":", "::", ":::"])
721
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300722 @requires_unicode
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000723 def test_bug_926075(self):
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300724 self.assertIsNot(re.compile('bug_926075'),
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300725 re.compile(u'bug_926075'))
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000726
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300727 @requires_unicode
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000728 def test_bug_931848(self):
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300729 pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000730 self.assertEqual(re.compile(pattern).split("a.b.c"),
731 ['a','b','c'])
732
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000733 def test_bug_581080(self):
734 iter = re.finditer(r"\s", "a b")
735 self.assertEqual(iter.next().span(), (1,2))
736 self.assertRaises(StopIteration, iter.next)
737
738 scanner = re.compile(r"\s").scanner("a b")
739 self.assertEqual(scanner.search().span(), (1, 2))
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300740 self.assertIsNone(scanner.search())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000741
742 def test_bug_817234(self):
743 iter = re.finditer(r".*", "asdf")
744 self.assertEqual(iter.next().span(), (0, 4))
745 self.assertEqual(iter.next().span(), (4, 4))
746 self.assertRaises(StopIteration, iter.next)
747
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300748 @requires_unicode
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000749 def test_bug_6561(self):
750 # '\d' should match characters in Unicode category 'Nd'
751 # (Number, Decimal Digit), but not those in 'Nl' (Number,
752 # Letter) or 'No' (Number, Other).
753 decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300754 unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
755 unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
756 unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000757 ]
758 for x in decimal_digits:
759 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
760
761 not_decimal_digits = [
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300762 unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
763 unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
764 unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
765 unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000766 ]
767 for x in not_decimal_digits:
768 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
769
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000770 def test_empty_array(self):
771 # SF buf 1647541
772 import array
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300773 typecodes = 'cbBhHiIlLfd'
774 if have_unicode:
775 typecodes += 'u'
776 for typecode in typecodes:
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000777 a = array.array(typecode)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300778 self.assertIsNone(re.compile("bla").match(a))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000779 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000780
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300781 @requires_unicode
Guido van Rossumae04c332008-01-03 19:12:44 +0000782 def test_inline_flags(self):
783 # Bug #1700
784 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
785 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
786
787 p = re.compile(upper_char, re.I | re.U)
788 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300789 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000790
791 p = re.compile(lower_char, re.I | re.U)
792 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300793 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000794
795 p = re.compile('(?i)' + upper_char, re.U)
796 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300797 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000798
799 p = re.compile('(?i)' + lower_char, re.U)
800 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300801 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000802
803 p = re.compile('(?iu)' + upper_char)
804 q = p.match(lower_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300805 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000806
807 p = re.compile('(?iu)' + lower_char)
808 q = p.match(upper_char)
Serhiy Storchakaed5ea152014-09-14 16:19:37 +0300809 self.assertTrue(q)
Guido van Rossumae04c332008-01-03 19:12:44 +0000810
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000811 def test_dollar_matches_twice(self):
812 "$ matches the end of string, and just before the terminating \n"
813 pattern = re.compile('$')
814 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
815 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
816 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
817
818 pattern = re.compile('$', re.MULTILINE)
819 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
820 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
821 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
822
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000823 def test_dealloc(self):
824 # issue 3299: check for segfault in debug build
825 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000826 # the overflow limit is different on wide and narrow builds and it
827 # depends on the definition of SRE_CODE (see sre.h).
828 # 2**128 should be big enough to overflow on both. For smaller values
829 # a RuntimeError is raised instead of OverflowError.
830 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000831 self.assertRaises(TypeError, re.finditer, "a", {})
832 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000833
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200834 def test_compile(self):
835 # Test return value when given string and pattern as parameter
836 pattern = re.compile('random pattern')
837 self.assertIsInstance(pattern, re._pattern_type)
838 same_pattern = re.compile(pattern)
839 self.assertIsInstance(same_pattern, re._pattern_type)
840 self.assertIs(same_pattern, pattern)
841 # Test behaviour when not given a string or pattern as parameter
842 self.assertRaises(TypeError, re.compile, 0)
843
Ezio Melotti5c4e32b2013-01-11 08:32:01 +0200844 def test_bug_13899(self):
845 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
846 # nothing. Ditto B and Z.
847 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
848 ['A', 'B', '\b', 'C', 'Z'])
849
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100850 @precisionbigmemtest(size=_2G, memuse=1)
851 def test_large_search(self, size):
852 # Issue #10182: indices were 32-bit-truncated.
853 s = 'a' * size
854 m = re.search('$', s)
855 self.assertIsNotNone(m)
Antoine Pitrou74635c92012-12-03 21:08:43 +0100856 self.assertEqual(m.start(), size)
857 self.assertEqual(m.end(), size)
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100858
Antoine Pitroub83575b2012-12-02 12:52:36 +0100859 # The huge memuse is because of re.sub() using a list and a join()
860 # to create the replacement result.
Antoine Pitrou735f36e2012-12-03 20:53:12 +0100861 @precisionbigmemtest(size=_2G, memuse=16 + 2)
862 def test_large_subn(self, size):
Antoine Pitroub83575b2012-12-02 12:52:36 +0100863 # Issue #10182: indices were 32-bit-truncated.
864 s = 'a' * size
Antoine Pitroub83575b2012-12-02 12:52:36 +0100865 r, n = re.subn('', '', s)
866 self.assertEqual(r, s)
867 self.assertEqual(n, size + 1)
868
869
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200870 def test_repeat_minmax_overflow(self):
871 # Issue #13169
872 string = "x" * 100000
873 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
874 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
875 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
876 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
877 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
878 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
879 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
880 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
881 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
882 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
883 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
884
885 @cpython_only
886 def test_repeat_minmax_overflow_maxrepeat(self):
887 try:
888 from _sre import MAXREPEAT
889 except ImportError:
890 self.skipTest('requires _sre.MAXREPEAT constant')
891 string = "x" * 100000
892 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
893 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
894 (0, 100000))
895 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
896 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
897 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
898 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
899
R David Murray60773392013-04-14 13:08:50 -0400900 def test_backref_group_name_in_exception(self):
901 # Issue 17341: Poor error message when compiling invalid regex
902 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
903 re.compile('(?P=<foo>)')
904
905 def test_group_name_in_exception(self):
906 # Issue 17341: Poor error message when compiling invalid regex
907 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
908 re.compile('(?P<?foo>)')
909
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300910 def test_issue17998(self):
911 for reps in '*', '+', '?', '{1}':
912 for mod in '', '?':
913 pattern = '.' + reps + mod + 'yz'
914 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
915 ['xyz'], msg=pattern)
Serhiy Storchaka7644ff12014-09-14 17:40:44 +0300916 if have_unicode:
917 pattern = unicode(pattern)
918 self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
919 [u'xyz'], msg=pattern)
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +0300920
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200921
Serhiy Storchaka83737c62013-08-19 23:20:07 +0300922 def test_bug_2537(self):
923 # issue 2537: empty submatches
924 for outer_op in ('{0,}', '*', '+', '{1,187}'):
925 for inner_op in ('{0,}', '*', '?'):
926 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
927 m = r.match("xyyzy")
928 self.assertEqual(m.group(0), "xyy")
929 self.assertEqual(m.group(1), "")
930 self.assertEqual(m.group(2), "y")
931
Antoine Pitrouf5814112014-02-03 20:59:59 +0100932 def test_debug_flag(self):
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300933 pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
Antoine Pitrouf5814112014-02-03 20:59:59 +0100934 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300935 re.compile(pat, re.DEBUG)
936 dump = '''\
937subpattern 1
938 literal 46
939subpattern None
940 branch
941 in
942 literal 99
943 literal 104
944 or
945 literal 112
946 literal 121
947subpattern None
948 groupref_exists 1
949 at at_end
950 else
951 literal 58
952 literal 32
953'''
954 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +0100955 # Debug output is output again even a second time (bypassing
956 # the cache -- issue #20426).
957 with captured_stdout() as out:
Serhiy Storchakac0799e32014-09-21 22:47:30 +0300958 re.compile(pat, re.DEBUG)
959 self.assertEqual(out.getvalue(), dump)
Antoine Pitrouf5814112014-02-03 20:59:59 +0100960
Serhiy Storchakae50fe4c2014-03-06 12:24:29 +0200961 def test_keyword_parameters(self):
962 # Issue #20283: Accepting the string keyword parameter.
963 pat = re.compile(r'(ab)')
964 self.assertEqual(
965 pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
966 self.assertEqual(
967 pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
968 self.assertEqual(
969 pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
970 self.assertEqual(
971 pat.split(string='abracadabra', maxsplit=1),
972 ['', 'ab', 'racadabra'])
973
Antoine Pitrouf5814112014-02-03 20:59:59 +0100974
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000975def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000976 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000977 if verbose:
978 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000979 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000980 # To save time, only run the first and last 10 tests
981 #tests = tests[:10] + tests[-10:]
982 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000983
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000984 for t in tests:
985 sys.stdout.flush()
986 pattern = s = outcome = repl = expected = None
987 if len(t) == 5:
988 pattern, s, outcome, repl, expected = t
989 elif len(t) == 3:
990 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000991 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000992 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
993
Guido van Rossum41360a41998-03-26 19:42:58 +0000994 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000995 obj = re.compile(pattern)
996 except re.error:
997 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000998 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000999 print '=== Syntax error:', t
1000 except KeyboardInterrupt: raise KeyboardInterrupt
1001 except:
1002 print '*** Unexpected error ***', t
1003 if verbose:
1004 traceback.print_exc(file=sys.stdout)
1005 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +00001006 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001007 result = obj.search(s)
1008 except re.error, msg:
1009 print '=== Unexpected exception', t, repr(msg)
1010 if outcome == SYNTAX_ERROR:
1011 # This should have been a syntax error; forget it.
1012 pass
1013 elif outcome == FAIL:
1014 if result is None: pass # No match, as expected
1015 else: print '=== Succeeded incorrectly', t
1016 elif outcome == SUCCEED:
1017 if result is not None:
1018 # Matched, as expected, so now we compute the
1019 # result string and compare it to our expected result.
1020 start, end = result.span(0)
1021 vardict={'found': result.group(0),
1022 'groups': result.group(),
1023 'flags': result.re.flags}
1024 for i in range(1, 100):
1025 try:
1026 gi = result.group(i)
1027 # Special hack because else the string concat fails:
1028 if gi is None:
1029 gi = "None"
1030 except IndexError:
1031 gi = "Error"
1032 vardict['g%d' % i] = gi
1033 for i in result.re.groupindex.keys():
1034 try:
1035 gi = result.group(i)
1036 if gi is None:
1037 gi = "None"
1038 except IndexError:
1039 gi = "Error"
1040 vardict[i] = gi
1041 repl = eval(repl, vardict)
1042 if repl != expected:
1043 print '=== grouping error', t,
1044 print repr(repl) + ' should be ' + repr(expected)
1045 else:
1046 print '=== Failed incorrectly', t
1047
1048 # Try the match on a unicode string, and check that it
1049 # still succeeds.
1050 try:
1051 result = obj.search(unicode(s, "latin-1"))
1052 if result is None:
1053 print '=== Fails on unicode match', t
1054 except NameError:
1055 continue # 1.5.2
1056 except TypeError:
1057 continue # unicode test case
1058
1059 # Try the match on a unicode pattern, and check that it
1060 # still succeeds.
1061 obj=re.compile(unicode(pattern, "latin-1"))
1062 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +00001063 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001064 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001065
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001066 # Try the match with the search area limited to the extent
1067 # of the match and see if it still succeeds. \B will
1068 # break (because it won't match at the end or start of a
1069 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001070
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001071 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1072 and result is not None:
1073 obj = re.compile(pattern)
1074 result = obj.search(s, result.start(0), result.end(0) + 1)
1075 if result is None:
1076 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +00001077
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001078 # Try the match with IGNORECASE enabled, and check that it
1079 # still succeeds.
1080 obj = re.compile(pattern, re.IGNORECASE)
1081 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +00001082 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001083 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001084
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001085 # Try the match with LOCALE enabled, and check that it
1086 # still succeeds.
1087 obj = re.compile(pattern, re.LOCALE)
1088 result = obj.search(s)
1089 if result is None:
1090 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +00001091
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001092 # Try the match with UNICODE locale enabled, and check
1093 # that it still succeeds.
1094 obj = re.compile(pattern, re.UNICODE)
1095 result = obj.search(s)
1096 if result is None:
1097 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +00001098
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001099def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +00001100 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +00001101 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +00001102
1103if __name__ == "__main__":
1104 test_main()