blob: befe0e8b98e518398f0be173c2b1636c64d19617 [file] [log] [blame]
Florent Xicluna6257a7b2010-03-31 22:01:03 +00001from test.test_support import verbose, run_unittest, import_module
Antoine Pitroub83575b2012-12-02 12:52:36 +01002from test.test_support import precisionbigmemtest, _2G
Guido van Rossum8e0ce301997-07-11 19:34:44 +00003import re
Neal Norwitz94a9c092006-03-16 06:30:02 +00004from re import Scanner
Ezio Melotti46645632011-03-25 14:50:52 +02005import sys
6import string
7import traceback
Raymond Hettinger027bb632004-05-31 03:09:25 +00008from weakref import proxy
Guido van Rossum8e0ce301997-07-11 19:34:44 +00009
Guido van Rossum23b22571997-07-17 22:36:14 +000010# Misc tests from Tim Peters' re.doc
11
Just van Rossum6802c6e2003-07-02 14:36:59 +000012# WARNING: Don't change details in these tests if you don't know
Ezio Melotti24b07bc2011-03-15 18:55:01 +020013# what you're doing. Some of these tests were carefully modeled to
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +000014# cover most of the code.
15
Skip Montanaro8ed06da2003-04-24 19:43:18 +000016import unittest
Guido van Rossum8430c581998-04-03 21:47:12 +000017
Skip Montanaro8ed06da2003-04-24 19:43:18 +000018class ReTests(unittest.TestCase):
Raymond Hettinger027bb632004-05-31 03:09:25 +000019
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
Skip Montanaro8ed06da2003-04-24 19:43:18 +000026 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000031 self.assertEqual(re.search('x', 'aaa'), None)
Skip Montanaro8ed06da2003-04-24 19:43:18 +000032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
Skip Montanaro5ba00542003-04-25 16:00:14 +000036 self.assertEqual(re.match('a+', 'xxx'), None)
Guido van Rossum8430c581998-04-03 21:47:12 +000037
Skip Montanaro8ed06da2003-04-24 19:43:18 +000038 def bump_num(self, matchobj):
Guido van Rossum41360a41998-03-26 19:42:58 +000039 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
Guido van Rossum23b22571997-07-17 22:36:14 +000041
Skip Montanaro8ed06da2003-04-24 19:43:18 +000042 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
Fredrik Lundh1151a8c2000-08-08 16:47:42 +000048
Skip Montanaro8ed06da2003-04-24 19:43:18 +000049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
Guido van Rossumdfa67901997-12-08 17:12:06 +000051
Skip Montanaro8ed06da2003-04-24 19:43:18 +000052 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
Guido van Rossum23b22571997-07-17 22:36:14 +000056
Skip Montanaro8ed06da2003-04-24 19:43:18 +000057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
Guido van Rossum49946571997-07-18 04:26:25 +000061
Skip Montanaro8ed06da2003-04-24 19:43:18 +000062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
Guido van Rossum95e80531997-08-13 22:34:14 +000067
Skip Montanaro8ed06da2003-04-24 19:43:18 +000068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Guido van Rossume056e4d2001-08-10 14:52:48 +000069
Skip Montanaro2726fcd2003-04-25 14:31:54 +000070 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
Skip Montanaro8ed06da2003-04-24 19:43:18 +000077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
Guido van Rossum23b22571997-07-17 22:36:14 +000085
Guido van Rossum1ff91d92007-09-10 22:02:25 +000086 def test_bug_1140(self):
87 # re.sub(x, y, u'') should return u'', not '', and
88 # re.sub(x, y, '') should return '', not u''.
89 # Also:
90 # re.sub(x, y, unicode(x)) should return unicode(y), and
91 # re.sub(x, y, str(x)) should return
92 # str(y) if isinstance(y, str) else unicode(y).
93 for x in 'x', u'x':
94 for y in 'y', u'y':
95 z = re.sub(x, y, u'')
96 self.assertEqual(z, u'')
97 self.assertEqual(type(z), unicode)
98 #
99 z = re.sub(x, y, '')
100 self.assertEqual(z, '')
101 self.assertEqual(type(z), str)
102 #
103 z = re.sub(x, y, unicode(x))
104 self.assertEqual(z, y)
105 self.assertEqual(type(z), unicode)
106 #
107 z = re.sub(x, y, str(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), type(y))
110
Raymond Hettinger80016c92007-12-19 18:13:31 +0000111 def test_bug_1661(self):
112 # Verify that flags do not get silently ignored with compiled patterns
113 pattern = re.compile('.')
114 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.compile, pattern, re.I)
118
Guido van Rossume3c4fd92008-09-10 14:27:00 +0000119 def test_bug_3629(self):
120 # A regex that triggered a bug in the sre-code validator
121 re.compile("(?P<quote>)(?(quote))")
122
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000123 def test_sub_template_numeric_escape(self):
124 # bug 776311 and friends
125 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
126 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
127 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
128 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
129 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
130 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
131 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
132
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
134 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
135
136 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
137 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
138 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
139 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
140 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
141
142 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
143 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
Tim Peters0e9980f2004-09-12 03:49:31 +0000144
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000145 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
154 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
156 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
157
158 # in python2.3 (etc), these loop endlessly in sre_parser.py
159 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
160 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
161 'xz8')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
163 'xza')
164
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000165 def test_qualified_re_sub(self):
166 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
167 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
Guido van Rossum8430c581998-04-03 21:47:12 +0000168
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000169 def test_bug_114660(self):
170 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
171 'hello there')
172
173 def test_bug_462270(self):
174 # Test for empty sub() behaviour, see SF bug #462270
175 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
176 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
177
Ezio Melottief317382012-11-03 20:31:12 +0200178 def test_symbolic_groups(self):
179 re.compile('(?P<a>x)(?P=a)(?(a)y)')
180 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
181 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
182 self.assertRaises(re.error, re.compile, '(?Px)')
183 self.assertRaises(re.error, re.compile, '(?P=)')
184 self.assertRaises(re.error, re.compile, '(?P=1)')
185 self.assertRaises(re.error, re.compile, '(?P=a)')
186 self.assertRaises(re.error, re.compile, '(?P=a1)')
187 self.assertRaises(re.error, re.compile, '(?P=a.)')
188 self.assertRaises(re.error, re.compile, '(?P<)')
189 self.assertRaises(re.error, re.compile, '(?P<>)')
190 self.assertRaises(re.error, re.compile, '(?P<1>)')
191 self.assertRaises(re.error, re.compile, '(?P<a.>)')
192 self.assertRaises(re.error, re.compile, '(?())')
193 self.assertRaises(re.error, re.compile, '(?(a))')
194 self.assertRaises(re.error, re.compile, '(?(1a))')
195 self.assertRaises(re.error, re.compile, '(?(a.))')
196
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000197 def test_symbolic_refs(self):
198 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
199 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
Ezio Melottief317382012-11-03 20:31:12 +0200202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
204 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
206 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000207 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
Guido van Rossumf473cb01998-01-14 16:42:17 +0000208
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000209 def test_re_subn(self):
210 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
211 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
212 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
213 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
214 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
Guido van Rossum49946571997-07-18 04:26:25 +0000215
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000216 def test_re_split(self):
217 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
218 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
219 self.assertEqual(re.split("(:*)", ":a:b::c"),
220 ['', ':', 'a', ':', 'b', '::', 'c'])
221 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
222 self.assertEqual(re.split("(:)*", ":a:b::c"),
223 ['', ':', 'a', ':', 'b', ':', 'c'])
224 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
225 ['', ':', 'a', ':b::', 'c'])
226 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
227 ['', None, ':', 'a', None, ':', '', 'b', None, '',
228 None, '::', 'c'])
229 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
230 ['', 'a', '', '', 'c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000231
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000232 def test_qualified_re_split(self):
233 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
234 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
235 self.assertEqual(re.split("(:)", ":a:b::c", 2),
236 ['', ':', 'a', ':', 'b::c'])
237 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
238 ['', ':', 'a', ':', 'b::c'])
Guido van Rossum49946571997-07-18 04:26:25 +0000239
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000240 def test_re_findall(self):
241 self.assertEqual(re.findall(":+", "abc"), [])
242 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
243 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
244 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
245 (":", ":"),
246 (":", "::")])
Guido van Rossum49946571997-07-18 04:26:25 +0000247
Skip Montanaro5ba00542003-04-25 16:00:14 +0000248 def test_bug_117612(self):
249 self.assertEqual(re.findall(r"(a|(b))", "aba"),
250 [("a", ""),("b", "b"),("a", "")])
251
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000252 def test_re_match(self):
Skip Montanaro5ba00542003-04-25 16:00:14 +0000253 self.assertEqual(re.match('a', 'a').groups(), ())
254 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
255 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
256 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
257 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000258
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000259 pat = re.compile('((a)|(b))(c)?')
260 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
261 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
262 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
263 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
264 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
Guido van Rossum8430c581998-04-03 21:47:12 +0000265
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000266 # A single group
267 m = re.match('(a)', 'a')
268 self.assertEqual(m.group(0), 'a')
269 self.assertEqual(m.group(0), 'a')
270 self.assertEqual(m.group(1), 'a')
271 self.assertEqual(m.group(1, 1), ('a', 'a'))
Guido van Rossum49946571997-07-18 04:26:25 +0000272
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000273 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
274 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
275 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
276 (None, 'b', None))
277 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Guido van Rossum49946571997-07-18 04:26:25 +0000278
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000279 def test_re_groupref_exists(self):
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000280 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
281 ('(', 'a'))
282 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
283 (None, 'a'))
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
285 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
286 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
287 ('a', 'b'))
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
289 (None, 'd'))
290 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
291 (None, 'd'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
293 ('a', ''))
294
Michael W. Hudsone7fa1af2005-06-03 13:55:58 +0000295 # Tests for bug #1177831: exercise groups other than the first group
296 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
297 self.assertEqual(p.match('abc').groups(),
298 ('a', 'b', 'c'))
299 self.assertEqual(p.match('ad').groups(),
300 ('a', None, 'd'))
301 self.assertEqual(p.match('abd'), None)
302 self.assertEqual(p.match('ac'), None)
303
Andrew M. Kuchling3554cad2005-06-02 13:38:45 +0000304
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000305 def test_re_groupref(self):
306 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
307 ('|', 'a'))
308 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
309 (None, 'a'))
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
311 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
312 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
313 ('a', 'a'))
314 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
315 (None, None))
316
317 def test_groupdict(self):
318 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
319 'first second').groupdict(),
320 {'first':'first', 'second':'second'})
321
322 def test_expand(self):
323 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
324 "first second")
325 .expand(r"\2 \1 \g<second> \g<first>"),
326 "second first second first")
327
328 def test_repeat_minmax(self):
329 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
330 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
331 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
333
334 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
335 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
336 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
342
343 self.assertEqual(re.match("^x{1}$", "xxx"), None)
344 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
345 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
346 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
347
348 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
349 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
350 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
356
Gustavo Niemeyer6fa0c5a2005-09-14 08:54:39 +0000357 self.assertEqual(re.match("^x{}$", "xxx"), None)
358 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
359
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000360 def test_getattr(self):
361 self.assertEqual(re.match("(a)", "a").pos, 0)
362 self.assertEqual(re.match("(a)", "a").endpos, 1)
363 self.assertEqual(re.match("(a)", "a").string, "a")
364 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
365 self.assertNotEqual(re.match("(a)", "a").re, None)
366
367 def test_special_escapes(self):
368 self.assertEqual(re.search(r"\b(b.)\b",
369 "abcd abc bcd bx").group(1), "bx")
370 self.assertEqual(re.search(r"\B(b.)\B",
371 "abc bcd bc abxd").group(1), "bx")
372 self.assertEqual(re.search(r"\b(b.)\b",
373 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
374 self.assertEqual(re.search(r"\B(b.)\B",
375 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
376 self.assertEqual(re.search(r"\b(b.)\b",
377 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
378 self.assertEqual(re.search(r"\B(b.)\B",
379 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
380 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
381 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
382 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
383 self.assertEqual(re.search(r"\b(b.)\b",
384 u"abcd abc bcd bx").group(1), "bx")
385 self.assertEqual(re.search(r"\B(b.)\B",
386 u"abc bcd bc abxd").group(1), "bx")
387 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
388 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
389 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
390 self.assertEqual(re.search(r"\d\D\w\W\s\S",
391 "1aa! a").group(0), "1aa! a")
392 self.assertEqual(re.search(r"\d\D\w\W\s\S",
393 "1aa! a", re.LOCALE).group(0), "1aa! a")
394 self.assertEqual(re.search(r"\d\D\w\W\s\S",
395 "1aa! a", re.UNICODE).group(0), "1aa! a")
396
Ezio Melotti38ae5b22012-02-29 11:40:00 +0200397 def test_string_boundaries(self):
398 # See http://bugs.python.org/issue10713
399 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
400 "abc")
401 # There's a word boundary at the start of a string.
402 self.assertTrue(re.match(r"\b", "abc"))
403 # A non-empty string includes a non-boundary zero-length match.
404 self.assertTrue(re.search(r"\B", "abc"))
405 # There is no non-boundary match at the start of a string.
406 self.assertFalse(re.match(r"\B", "abc"))
407 # However, an empty string contains no word boundaries, and also no
408 # non-boundaries.
409 self.assertEqual(re.search(r"\B", ""), None)
410 # This one is questionable and different from the perlre behaviour,
411 # but describes current behavior.
412 self.assertEqual(re.search(r"\b", ""), None)
413 # A single word-character string has two boundaries, but no
414 # non-boundary gaps.
415 self.assertEqual(len(re.findall(r"\b", "a")), 2)
416 self.assertEqual(len(re.findall(r"\B", "a")), 0)
417 # If there are no words, there are no boundaries
418 self.assertEqual(len(re.findall(r"\b", " ")), 0)
419 self.assertEqual(len(re.findall(r"\b", " ")), 0)
420 # Can match around the whitespace.
421 self.assertEqual(len(re.findall(r"\B", " ")), 2)
422
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000423 def test_bigcharset(self):
424 self.assertEqual(re.match(u"([\u2222\u2223])",
425 u"\u2222").group(1), u"\u2222")
426 self.assertEqual(re.match(u"([\u2222\u2223])",
427 u"\u2222", re.UNICODE).group(1), u"\u2222")
428
Antoine Pitroub83ea142012-11-20 22:30:42 +0100429 def test_big_codesize(self):
430 # Issue #1160
431 r = re.compile('|'.join(('%d'%x for x in range(10000))))
432 self.assertIsNotNone(r.match('1000'))
433 self.assertIsNotNone(r.match('9999'))
434
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000435 def test_anyall(self):
436 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
437 "a\nb")
438 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
439 "a\n\nb")
440
441 def test_non_consuming(self):
442 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
443 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
444 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
445 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
446 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
447 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
448 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
449
450 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
451 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
453 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
454
455 def test_ignore_case(self):
Georg Brandl30de77b2008-08-24 18:11:07 +0000456 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
457 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000458 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
459 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
460 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
461 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
462 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
463 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
464 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
465 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
466
467 def test_category(self):
468 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
469
470 def test_getlower(self):
471 import _sre
472 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
473 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
474 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
475
476 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
477 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
478
479 def test_not_literal(self):
480 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
481 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
482
483 def test_search_coverage(self):
484 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
485 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
486
Ezio Melotti46645632011-03-25 14:50:52 +0200487 def assertMatch(self, pattern, text, match=None, span=None,
488 matcher=re.match):
489 if match is None and span is None:
490 # the pattern matches the whole text
491 match = text
492 span = (0, len(text))
493 elif match is None or span is None:
494 raise ValueError('If match is not None, span should be specified '
495 '(and vice versa).')
496 m = matcher(pattern, text)
497 self.assertTrue(m)
498 self.assertEqual(m.group(), match)
499 self.assertEqual(m.span(), span)
Guido van Rossum49946571997-07-18 04:26:25 +0000500
Ezio Melotti46645632011-03-25 14:50:52 +0200501 def test_re_escape(self):
502 alnum_chars = string.ascii_letters + string.digits
503 p = u''.join(unichr(i) for i in range(256))
504 for c in p:
505 if c in alnum_chars:
506 self.assertEqual(re.escape(c), c)
507 elif c == u'\x00':
508 self.assertEqual(re.escape(c), u'\\000')
509 else:
510 self.assertEqual(re.escape(c), u'\\' + c)
511 self.assertMatch(re.escape(c), c)
512 self.assertMatch(re.escape(p), p)
513
514 def test_re_escape_byte(self):
515 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
516 p = ''.join(chr(i) for i in range(256))
517 for b in p:
518 if b in alnum_chars:
519 self.assertEqual(re.escape(b), b)
520 elif b == b'\x00':
521 self.assertEqual(re.escape(b), b'\\000')
522 else:
523 self.assertEqual(re.escape(b), b'\\' + b)
524 self.assertMatch(re.escape(b), b)
525 self.assertMatch(re.escape(p), p)
526
527 def test_re_escape_non_ascii(self):
528 s = u'xxx\u2620\u2620\u2620xxx'
529 s_escaped = re.escape(s)
530 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
531 self.assertMatch(s_escaped, s)
532 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
533 u'x\u2620\u2620\u2620x', (2, 7), re.search)
534
535 def test_re_escape_non_ascii_bytes(self):
536 b = u'y\u2620y\u2620y'.encode('utf-8')
537 b_escaped = re.escape(b)
538 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
539 self.assertMatch(b_escaped, b)
540 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
541 self.assertEqual(len(res), 2)
Guido van Rossum49946571997-07-18 04:26:25 +0000542
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000543 def test_pickling(self):
544 import pickle
Skip Montanaro1e703c62003-04-25 15:40:28 +0000545 self.pickle_test(pickle)
546 import cPickle
547 self.pickle_test(cPickle)
Žiga Seilnacht7492e422007-03-21 20:07:56 +0000548 # old pickles expect the _compile() reconstructor in sre module
Florent Xicluna6257a7b2010-03-31 22:01:03 +0000549 import_module("sre", deprecated=True)
550 from sre import _compile
Skip Montanaro1e703c62003-04-25 15:40:28 +0000551
552 def pickle_test(self, pickle):
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000553 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
554 s = pickle.dumps(oldpat)
555 newpat = pickle.loads(s)
556 self.assertEqual(oldpat, newpat)
Guido van Rossum23b22571997-07-17 22:36:14 +0000557
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000558 def test_constants(self):
559 self.assertEqual(re.I, re.IGNORECASE)
560 self.assertEqual(re.L, re.LOCALE)
561 self.assertEqual(re.M, re.MULTILINE)
562 self.assertEqual(re.S, re.DOTALL)
563 self.assertEqual(re.X, re.VERBOSE)
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000564
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000565 def test_flags(self):
Skip Montanaro1e703c62003-04-25 15:40:28 +0000566 for flag in [re.I, re.M, re.X, re.S, re.L]:
567 self.assertNotEqual(re.compile('^pattern$', flag), None)
Guido van Rossumf473cb01998-01-14 16:42:17 +0000568
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000569 def test_sre_character_literals(self):
570 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
571 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
572 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
573 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
574 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
575 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
576 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
577 self.assertRaises(re.error, re.match, "\911", "")
578
Gustavo Niemeyera01a2ee2004-09-03 17:06:10 +0000579 def test_sre_character_class_literals(self):
580 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
581 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
582 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
583 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
584 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
585 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
587 self.assertRaises(re.error, re.match, "[\911]", "")
588
Skip Montanaro7d9963f2003-04-25 14:12:40 +0000589 def test_bug_113254(self):
590 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
591 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
592 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
593
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000594 def test_bug_527371(self):
595 # bug described in patches 527371/672491
596 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
597 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
598 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
599 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
600 self.assertEqual(re.match("((a))", "a").lastindex, 1)
601
602 def test_bug_545855(self):
603 # bug 545855 -- This pattern failed to cause a compile error as it
604 # should, instead provoking a TypeError.
605 self.assertRaises(re.error, re.compile, 'foo[a-')
606
607 def test_bug_418626(self):
608 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
609 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
610 # pattern '*?' on a long string.
611 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
612 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
613 20003)
614 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000615 # non-simple '*?' still used to hit the recursion limit, before the
Tim Peters58eb11c2004-01-18 20:29:55 +0000616 # non-recursive scheme was implemented.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000617 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
Skip Montanaro2726fcd2003-04-25 14:31:54 +0000618
619 def test_bug_612074(self):
620 pat=u"["+re.escape(u"\u2039")+u"]"
621 self.assertEqual(re.compile(pat) and 1, 1)
622
Skip Montanaro1e703c62003-04-25 15:40:28 +0000623 def test_stack_overflow(self):
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000624 # nasty cases that used to overflow the straightforward recursive
Skip Montanaro1e703c62003-04-25 15:40:28 +0000625 # implementation of repeated groups.
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000626 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
627 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
628 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
Skip Montanaro1e703c62003-04-25 15:40:28 +0000629
630 def test_scanner(self):
631 def s_ident(scanner, token): return token
632 def s_operator(scanner, token): return "op%s" % token
633 def s_float(scanner, token): return float(token)
634 def s_int(scanner, token): return int(token)
635
636 scanner = Scanner([
637 (r"[a-zA-Z_]\w*", s_ident),
638 (r"\d+\.\d*", s_float),
639 (r"\d+", s_int),
640 (r"=|\+|-|\*|/", s_operator),
641 (r"\s+", None),
642 ])
643
Gustavo Niemeyer25fe0bf2003-06-20 00:25:14 +0000644 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
645
Skip Montanaro1e703c62003-04-25 15:40:28 +0000646 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
647 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
648 'op+', 'bar'], ''))
649
Skip Montanaro5ba00542003-04-25 16:00:14 +0000650 def test_bug_448951(self):
651 # bug 448951 (similar to 429357, but with single char match)
652 # (Also test greedy matches.)
653 for op in '','?','*':
654 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
655 (None, None))
656 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
657 ('a:', 'a'))
658
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000659 def test_bug_725106(self):
660 # capturing groups in alternatives in repeats
661 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
662 ('b', 'a'))
663 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
664 ('c', 'b'))
665 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
666 ('b', None))
667 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
668 ('b', None))
669 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
670 ('b', 'a'))
671 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
672 ('c', 'b'))
673 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
674 ('b', None))
675 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
676 ('b', None))
677
Gustavo Niemeyer3646ab92003-04-27 13:25:21 +0000678 def test_bug_725149(self):
679 # mark_stack_base restoring before restoring marks
680 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
681 ('a', None))
682 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
683 ('a', None, None))
684
Just van Rossum12723ba2003-07-02 20:03:04 +0000685 def test_bug_764548(self):
686 # bug 764548, re.compile() barfs on str/unicode subclasses
687 try:
688 unicode
689 except NameError:
690 return # no problem if we have no unicode
691 class my_unicode(unicode): pass
692 pat = re.compile(my_unicode("abc"))
693 self.assertEqual(pat.match("xyz"), None)
694
Skip Montanaro5ba00542003-04-25 16:00:14 +0000695 def test_finditer(self):
696 iter = re.finditer(r":+", "a:b::c:::d")
697 self.assertEqual([item.group(0) for item in iter],
698 [":", "::", ":::"])
699
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000700 def test_bug_926075(self):
701 try:
702 unicode
703 except NameError:
704 return # no problem if we have no unicode
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000705 self.assertTrue(re.compile('bug_926075') is not
Hye-Shik Chang9f62ecc2004-04-20 21:30:07 +0000706 re.compile(eval("u'bug_926075'")))
707
Martin v. Löwis7d9c6c72004-05-07 07:18:13 +0000708 def test_bug_931848(self):
709 try:
710 unicode
711 except NameError:
712 pass
713 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
714 self.assertEqual(re.compile(pattern).split("a.b.c"),
715 ['a','b','c'])
716
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000717 def test_bug_581080(self):
718 iter = re.finditer(r"\s", "a b")
719 self.assertEqual(iter.next().span(), (1,2))
720 self.assertRaises(StopIteration, iter.next)
721
722 scanner = re.compile(r"\s").scanner("a b")
723 self.assertEqual(scanner.search().span(), (1, 2))
724 self.assertEqual(scanner.search(), None)
725
726 def test_bug_817234(self):
727 iter = re.finditer(r".*", "asdf")
728 self.assertEqual(iter.next().span(), (0, 4))
729 self.assertEqual(iter.next().span(), (4, 4))
730 self.assertRaises(StopIteration, iter.next)
731
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000732 def test_bug_6561(self):
733 # '\d' should match characters in Unicode category 'Nd'
734 # (Number, Decimal Digit), but not those in 'Nl' (Number,
735 # Letter) or 'No' (Number, Other).
736 decimal_digits = [
737 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
738 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
739 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
740 ]
741 for x in decimal_digits:
742 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
743
744 not_decimal_digits = [
745 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
746 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
747 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
748 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
749 ]
750 for x in not_decimal_digits:
751 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
752
Raymond Hettinger01a807d2007-04-02 22:54:21 +0000753 def test_empty_array(self):
754 # SF buf 1647541
755 import array
756 for typecode in 'cbBuhHiIlLfd':
757 a = array.array(typecode)
758 self.assertEqual(re.compile("bla").match(a), None)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000759 self.assertEqual(re.compile("").match(a).groups(), ())
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000760
Guido van Rossumae04c332008-01-03 19:12:44 +0000761 def test_inline_flags(self):
762 # Bug #1700
763 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
764 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
765
766 p = re.compile(upper_char, re.I | re.U)
767 q = p.match(lower_char)
768 self.assertNotEqual(q, None)
769
770 p = re.compile(lower_char, re.I | re.U)
771 q = p.match(upper_char)
772 self.assertNotEqual(q, None)
773
774 p = re.compile('(?i)' + upper_char, re.U)
775 q = p.match(lower_char)
776 self.assertNotEqual(q, None)
777
778 p = re.compile('(?i)' + lower_char, re.U)
779 q = p.match(upper_char)
780 self.assertNotEqual(q, None)
781
782 p = re.compile('(?iu)' + upper_char)
783 q = p.match(lower_char)
784 self.assertNotEqual(q, None)
785
786 p = re.compile('(?iu)' + lower_char)
787 q = p.match(upper_char)
788 self.assertNotEqual(q, None)
789
Amaury Forgeot d'Arcd08a8eb2008-01-10 21:59:42 +0000790 def test_dollar_matches_twice(self):
791 "$ matches the end of string, and just before the terminating \n"
792 pattern = re.compile('$')
793 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
794 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
795 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
796
797 pattern = re.compile('$', re.MULTILINE)
798 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
799 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
800 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
801
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000802 def test_dealloc(self):
803 # issue 3299: check for segfault in debug build
804 import _sre
Ezio Melotti0e4e7322010-01-23 10:43:05 +0000805 # the overflow limit is different on wide and narrow builds and it
806 # depends on the definition of SRE_CODE (see sre.h).
807 # 2**128 should be big enough to overflow on both. For smaller values
808 # a RuntimeError is raised instead of OverflowError.
809 long_overflow = 2**128
Antoine Pitrouefdddd32010-01-14 17:25:24 +0000810 self.assertRaises(TypeError, re.finditer, "a", {})
811 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
Guido van Rossumae04c332008-01-03 19:12:44 +0000812
Ezio Melottib56b6ff2012-03-13 01:25:40 +0200813 def test_compile(self):
814 # Test return value when given string and pattern as parameter
815 pattern = re.compile('random pattern')
816 self.assertIsInstance(pattern, re._pattern_type)
817 same_pattern = re.compile(pattern)
818 self.assertIsInstance(same_pattern, re._pattern_type)
819 self.assertIs(same_pattern, pattern)
820 # Test behaviour when not given a string or pattern as parameter
821 self.assertRaises(TypeError, re.compile, 0)
822
Antoine Pitroub83575b2012-12-02 12:52:36 +0100823 # The huge memuse is because of re.sub() using a list and a join()
824 # to create the replacement result.
825 @precisionbigmemtest(size=_2G, memuse=20)
826 def test_large(self, size):
827 # Issue #10182: indices were 32-bit-truncated.
828 s = 'a' * size
829 m = re.search('$', s)
830 self.assertIsNotNone(m)
831 self.assertEqual(m.start(), size)
832 self.assertEqual(m.end(), size)
833 r, n = re.subn('', '', s)
834 self.assertEqual(r, s)
835 self.assertEqual(n, size + 1)
836
837
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000838def run_re_tests():
Georg Brandla4f46e12010-02-07 17:03:15 +0000839 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000840 if verbose:
841 print 'Running re_tests test suite'
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000842 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000843 # To save time, only run the first and last 10 tests
844 #tests = tests[:10] + tests[-10:]
845 pass
Guido van Rossum8e0ce301997-07-11 19:34:44 +0000846
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000847 for t in tests:
848 sys.stdout.flush()
849 pattern = s = outcome = repl = expected = None
850 if len(t) == 5:
851 pattern, s, outcome, repl, expected = t
852 elif len(t) == 3:
853 pattern, s, outcome = t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000854 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000855 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
856
Guido van Rossum41360a41998-03-26 19:42:58 +0000857 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000858 obj = re.compile(pattern)
859 except re.error:
860 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
Guido van Rossum41360a41998-03-26 19:42:58 +0000861 else:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000862 print '=== Syntax error:', t
863 except KeyboardInterrupt: raise KeyboardInterrupt
864 except:
865 print '*** Unexpected error ***', t
866 if verbose:
867 traceback.print_exc(file=sys.stdout)
868 else:
Fredrik Lundh17741be2001-03-22 15:51:28 +0000869 try:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000870 result = obj.search(s)
871 except re.error, msg:
872 print '=== Unexpected exception', t, repr(msg)
873 if outcome == SYNTAX_ERROR:
874 # This should have been a syntax error; forget it.
875 pass
876 elif outcome == FAIL:
877 if result is None: pass # No match, as expected
878 else: print '=== Succeeded incorrectly', t
879 elif outcome == SUCCEED:
880 if result is not None:
881 # Matched, as expected, so now we compute the
882 # result string and compare it to our expected result.
883 start, end = result.span(0)
884 vardict={'found': result.group(0),
885 'groups': result.group(),
886 'flags': result.re.flags}
887 for i in range(1, 100):
888 try:
889 gi = result.group(i)
890 # Special hack because else the string concat fails:
891 if gi is None:
892 gi = "None"
893 except IndexError:
894 gi = "Error"
895 vardict['g%d' % i] = gi
896 for i in result.re.groupindex.keys():
897 try:
898 gi = result.group(i)
899 if gi is None:
900 gi = "None"
901 except IndexError:
902 gi = "Error"
903 vardict[i] = gi
904 repl = eval(repl, vardict)
905 if repl != expected:
906 print '=== grouping error', t,
907 print repr(repl) + ' should be ' + repr(expected)
908 else:
909 print '=== Failed incorrectly', t
910
911 # Try the match on a unicode string, and check that it
912 # still succeeds.
913 try:
914 result = obj.search(unicode(s, "latin-1"))
915 if result is None:
916 print '=== Fails on unicode match', t
917 except NameError:
918 continue # 1.5.2
919 except TypeError:
920 continue # unicode test case
921
922 # Try the match on a unicode pattern, and check that it
923 # still succeeds.
924 obj=re.compile(unicode(pattern, "latin-1"))
925 result = obj.search(s)
Fredrik Lundh17741be2001-03-22 15:51:28 +0000926 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000927 print '=== Fails on unicode pattern match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000928
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000929 # Try the match with the search area limited to the extent
930 # of the match and see if it still succeeds. \B will
931 # break (because it won't match at the end or start of a
932 # string), so we'll ignore patterns that feature it.
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000933
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000934 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
935 and result is not None:
936 obj = re.compile(pattern)
937 result = obj.search(s, result.start(0), result.end(0) + 1)
938 if result is None:
939 print '=== Failed on range-limited match', t
Fredrik Lundh1151a8c2000-08-08 16:47:42 +0000940
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000941 # Try the match with IGNORECASE enabled, and check that it
942 # still succeeds.
943 obj = re.compile(pattern, re.IGNORECASE)
944 result = obj.search(s)
Fred Drake132dce22000-12-12 23:11:42 +0000945 if result is None:
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000946 print '=== Fails on case-insensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000947
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000948 # Try the match with LOCALE enabled, and check that it
949 # still succeeds.
950 obj = re.compile(pattern, re.LOCALE)
951 result = obj.search(s)
952 if result is None:
953 print '=== Fails on locale-sensitive match', t
Guido van Rossumdfa67901997-12-08 17:12:06 +0000954
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000955 # Try the match with UNICODE locale enabled, and check
956 # that it still succeeds.
957 obj = re.compile(pattern, re.UNICODE)
958 result = obj.search(s)
959 if result is None:
960 print '=== Fails on unicode-sensitive match', t
Fredrik Lundh8e6d5712000-08-08 17:06:53 +0000961
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000962def test_main():
Walter Dörwald21d3a322003-05-01 17:45:56 +0000963 run_unittest(ReTests)
Skip Montanaro1e703c62003-04-25 15:40:28 +0000964 run_re_tests()
Skip Montanaro8ed06da2003-04-24 19:43:18 +0000965
966if __name__ == "__main__":
967 test_main()