Blame - Lib/test/test_re.py - platform/external/python/cpython3

2014-02-03 20:59:59 +0100

[diff] [blame]

2

cpython_only, captured_stdout

Benjamin Peterson

e48944b

2012-03-07 14:50:25 -0600

[diff] [blame]

3

import io

Serhiy Storchaka

4659cc0

2014-10-31 00:53:49 +0200

[diff] [blame]

4

import locale

Guido van Rossum

8e0ce30

1997-07-11 19:34:44 +0000

[diff] [blame]

5

import re

Thomas Wouters

9ada3d6

2006-04-21 09:47:09 +0000

[diff] [blame]

6

from re import Scanner

Antoine Pitrou

2013-10-25 21:36:10 +0200

[diff] [blame]

7

import sre_compile

R David Murray

2013-04-14 13:00:54 -0400

[diff] [blame]

8

import sre_constants

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

9

import sys

10

import string

11

import traceback

Antoine Pitrou

2013-10-25 21:36:10 +0200

[diff] [blame]

12

import unittest

Raymond Hettinger

027bb63

2004-05-31 03:09:25 +0000

[diff] [blame]

13

from weakref import proxy

Guido van Rossum

8e0ce30

1997-07-11 19:34:44 +0000

[diff] [blame]

14

Guido van Rossum

1997-07-17 22:36:14 +0000

[diff] [blame]

15

# Misc tests from Tim Peters' re.doc

16

Just van Rossum

6802c6e

2003-07-02 14:36:59 +0000

[diff] [blame]

17

# WARNING: Don't change details in these tests if you don't know

Ezio Melotti

42da663

2011-03-15 05:18:48 +0200

[diff] [blame]

18

# what you're doing. Some of these tests were carefully modeled to

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

19

# cover most of the code.

20

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

21

class S(str):

22

def __getitem__(self, index):

23

return S(super().__getitem__(index))

24

25

class B(bytes):

26

def __getitem__(self, index):

27

return B(super().__getitem__(index))

28

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

29

class ReTests(unittest.TestCase):

Raymond Hettinger

027bb63

2004-05-31 03:09:25 +0000

[diff] [blame]

30

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

31

def assertTypedEqual(self, actual, expect, msg=None):

32

self.assertEqual(actual, expect, msg)

33

def recurse(actual, expect):

34

if isinstance(expect, (tuple, list)):

35

for x, y in zip(actual, expect):

36

recurse(x, y)

37

else:

38

self.assertIs(type(actual), type(expect), msg)

39

recurse(actual, expect)

40

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

41

def checkPatternError(self, pattern, errmsg, pos=None):

42

with self.assertRaises(re.error) as cm:

43

re.compile(pattern)

44

with self.subTest(pattern=pattern):

45

err = cm.exception

46

self.assertEqual(err.msg, errmsg)

47

if pos is not None:

48

self.assertEqual(err.pos, pos)

49

50

def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):

51

with self.assertRaises(re.error) as cm:

52

re.sub(pattern, repl, string)

53

with self.subTest(pattern=pattern, repl=repl):

54

err = cm.exception

55

self.assertEqual(err.msg, errmsg)

56

if pos is not None:

57

self.assertEqual(err.pos, pos)

58

Benjamin Peterson

e48944b

2012-03-07 14:50:25 -0600

[diff] [blame]

59

def test_keep_buffer(self):

60

# See bug 14212

61

b = bytearray(b'x')

62

it = re.finditer(b'a', b)

63

with self.assertRaises(BufferError):

b.extend(b'x'*400)

list(it)

del it

gc_collect()

b.extend(b'x'*400)

Raymond Hettinger

2004-05-31 03:09:25 +0000

[diff] [blame]

70

def test_weakref(self):

71

s = 'QabbbcR'

72

x = re.compile('ab+c')

73

y = proxy(x)

74

self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))

75

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

76

def test_search_star_plus(self):

77

self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))

78

self.assertEqual(re.search('x*', 'axx').span(), (0, 0))

79

self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))

80

self.assertEqual(re.search('x+', 'axx').span(), (1, 3))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

81

self.assertIsNone(re.search('x', 'aaa'))

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

82

self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))

83

self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))

84

self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))

85

self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

86

self.assertIsNone(re.match('a+', 'xxx'))

Guido van Rossum

8430c58

1998-04-03 21:47:12 +0000

[diff] [blame]

87

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

88

def bump_num(self, matchobj):

Guido van Rossum

41360a4

1998-03-26 19:42:58 +0000

[diff] [blame]

89

int_value = int(matchobj.group(0))

90

return str(int_value + 1)

Guido van Rossum

1997-07-17 22:36:14 +0000

[diff] [blame]

91

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

92

def test_basic_re_sub(self):

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

93

self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')

94

self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')

95

self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')

96

self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')

97

self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')

98

self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')

Serhiy Storchaka

2013-10-26 10:45:48 +0300

[diff] [blame]

99

for y in ("\xe0", "\u0430", "\U0001d49c"):

100

self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

101

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

102

self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')

103

self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),

104

'9.3 -3 24x100y')

Serhiy Storchaka

2016-09-25 20:36:23 +0300

[diff] [blame]

105

self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),

106

'9.3 -3 23x99y')

Victor Stinner

2014-10-29 16:58:59 +0100

[diff] [blame]

107

self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

108

'9.3 -3 23x99y')

Fredrik Lundh

1151a8c

2000-08-08 16:47:42 +0000

[diff] [blame]

109

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

110

self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')

111

self.assertEqual(re.sub('.', r"\n", 'x'), '\n')

Guido van Rossum

dfa6790

1997-12-08 17:12:06 +0000

[diff] [blame]

112

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

113

s = r"\1\1"

114

self.assertEqual(re.sub('(.)', s, 'x'), 'xx')

115

self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)

116

self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)

Guido van Rossum

1997-07-17 22:36:14 +0000

[diff] [blame]

117

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

118

self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')

119

self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')

120

self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')

121

self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

122

Serhiy Storchaka

2015-03-24 22:58:14 +0200

[diff] [blame]

123

self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')

124

self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')

125

self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),

126

(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))

127

for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':

128

with self.subTest(c):

129

with self.assertWarns(DeprecationWarning):

130

self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)

Guido van Rossum

95e8053

1997-08-13 22:34:14 +0000

[diff] [blame]

131

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

132

self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')

Guido van Rossum

e056e4d

2001-08-10 14:52:48 +0000

[diff] [blame]

133

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

134

def test_bug_449964(self):

135

# fails for group followed by other escape

136

self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),

137

'xx\bxx\b')

138

139

def test_bug_449000(self):

140

# Test for sub() on escaped characters

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

141

self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),

142

'abc\ndef\n')

143

self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),

144

'abc\ndef\n')

145

self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),

146

'abc\ndef\n')

147

self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),

148

'abc\ndef\n')

Guido van Rossum

1997-07-17 22:36:14 +0000

[diff] [blame]

149

Christian Heimes

5fb7c2a

2007-12-24 08:52:31 +0000

[diff] [blame]

150

def test_bug_1661(self):

151

# Verify that flags do not get silently ignored with compiled patterns

152

pattern = re.compile('.')

153

self.assertRaises(ValueError, re.match, pattern, 'A', re.I)

154

self.assertRaises(ValueError, re.search, pattern, 'A', re.I)

155

self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)

156

self.assertRaises(ValueError, re.compile, pattern, re.I)

157

Guido van Rossum

92f8f3e

2008-09-10 14:30:50 +0000

[diff] [blame]

158

def test_bug_3629(self):

159

# A regex that triggered a bug in the sre-code validator

160

re.compile("(?P<quote>)(?(quote))")

161

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

162

def test_sub_template_numeric_escape(self):

163

# bug 776311 and friends

164

self.assertEqual(re.sub('x', r'\0', 'x'), '\0')

165

self.assertEqual(re.sub('x', r'\000', 'x'), '\000')

166

self.assertEqual(re.sub('x', r'\001', 'x'), '\001')

167

self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')

168

self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')

169

self.assertEqual(re.sub('x', r'\111', 'x'), '\111')

170

self.assertEqual(re.sub('x', r'\117', 'x'), '\117')

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

171

self.assertEqual(re.sub('x', r'\377', 'x'), '\377')

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

172

173

self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')

174

self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')

175

176

self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')

177

self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')

178

self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')

179

self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')

180

self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')

181

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

182

self.checkTemplateError('x', r'\400', 'x',

183

r'octal escape value \400 outside of '

184

r'range 0-0o377', 0)

185

self.checkTemplateError('x', r'\777', 'x',

186

r'octal escape value \777 outside of '

187

r'range 0-0o377', 0)

Tim Peters

0e9980f

2004-09-12 03:49:31 +0000

[diff] [blame]

188

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

189

self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')

190

self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')

191

self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')

192

self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')

193

self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')

194

self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')

195

self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')

196

self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')

197

self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'

198

self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')

199

self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'

200

self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

201

202

# in python2.3 (etc), these loop endlessly in sre_parser.py

203

self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')

204

self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),

205

'xz8')

206

self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),

207

'xza')

208

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

209

def test_qualified_re_sub(self):

210

self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')

Serhiy Storchaka

2016-09-25 20:36:23 +0300

[diff] [blame]

211

self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')

Victor Stinner

2014-10-29 16:58:59 +0100

[diff] [blame]

212

self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')

Guido van Rossum

8430c58

1998-04-03 21:47:12 +0000

[diff] [blame]

213

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

214

def test_bug_114660(self):

215

self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),

216

'hello there')

217

218

def test_bug_462270(self):

219

# Test for empty sub() behaviour, see SF bug #462270

220

self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')

221

self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')

222

Ezio Melotti

0941d9f

2012-11-03 20:33:08 +0200

[diff] [blame]

223

def test_symbolic_groups(self):

224

re.compile('(?P<a>x)(?P=a)(?(a)y)')

225

re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')

Serhiy Storchaka

2014-09-29 22:49:23 +0300

[diff] [blame]

226

re.compile('(?P<a1>x)\1(?(1)y)')

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

227

self.checkPatternError('(?P<a>)(?P<a>)',

228

"redefinition of group name 'a' as group 2; "

229

"was group 1")

Serhiy Storchaka

485407c

2015-07-18 23:27:00 +0300

[diff] [blame]

230

self.checkPatternError('(?P<a>(?P=a))',

231

"cannot refer to an open group", 10)

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

232

self.checkPatternError('(?Pxy)', 'unknown extension ?Px')

233

self.checkPatternError('(?P<a>)(?P=a', 'missing ), unterminated name', 11)

234

self.checkPatternError('(?P=', 'missing group name', 4)

235

self.checkPatternError('(?P=)', 'missing group name', 4)

236

self.checkPatternError('(?P=1)', "bad character in group name '1'", 4)

237

self.checkPatternError('(?P=a)', "unknown group name 'a'")

238

self.checkPatternError('(?P=a1)', "unknown group name 'a1'")

239

self.checkPatternError('(?P=a.)', "bad character in group name 'a.'", 4)

240

self.checkPatternError('(?P<)', 'missing >, unterminated name', 4)

241

self.checkPatternError('(?P<a', 'missing >, unterminated name', 4)

242

self.checkPatternError('(?P<', 'missing group name', 4)

243

self.checkPatternError('(?P<>)', 'missing group name', 4)

244

self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)

245

self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)

246

self.checkPatternError(r'(?(', 'missing group name', 3)

247

self.checkPatternError(r'(?())', 'missing group name', 3)

248

self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)

249

self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)

250

self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)

251

self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)

Georg Brandl

1d472b7

2013-04-14 11:40:00 +0200

[diff] [blame]

252

# New valid/invalid identifiers in Python 3

253

re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')

254

re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

255

self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)

Serhiy Storchaka

2014-09-29 22:49:23 +0300

[diff] [blame]

256

# Support > 100 groups.

257

pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))

258

pat = '(?:%s)(?(200)z|t)' % pat

259

self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

Ezio Melotti

0941d9f

2012-11-03 20:33:08 +0200

[diff] [blame]

260

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

261

def test_symbolic_refs(self):

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

262

self.checkTemplateError('(?P<a>x)', '\g<a', 'xx',

263

'missing >, unterminated name', 3)

264

self.checkTemplateError('(?P<a>x)', '\g<', 'xx',

265

'missing group name', 3)

266

self.checkTemplateError('(?P<a>x)', '\g', 'xx', 'missing <', 2)

267

self.checkTemplateError('(?P<a>x)', '\g<a a>', 'xx',

268

"bad character in group name 'a a'", 3)

269

self.checkTemplateError('(?P<a>x)', '\g<>', 'xx',

270

'missing group name', 3)

271

self.checkTemplateError('(?P<a>x)', '\g<1a1>', 'xx',

272

"bad character in group name '1a1'", 3)

273

self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',

274

'invalid group reference')

275

self.checkTemplateError('(?P<a>x)', r'\2', 'xx',

276

'invalid group reference')

277

with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):

278

re.sub('(?P<a>x)', '\g<ab>', 'xx')

Serhiy Storchaka

7438e4b

2014-10-10 11:06:31 +0300

[diff] [blame]

279

self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')

280

self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

281

self.checkTemplateError('(?P<a>x)', '\g<-1>', 'xx',

282

"bad character in group name '-1'", 3)

Georg Brandl

1d472b7

2013-04-14 11:40:00 +0200

[diff] [blame]

283

# New valid/invalid identifiers in Python 3

284

self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')

285

self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

286

self.checkTemplateError('(?P<a>x)', '\g<©>', 'xx',

287

"bad character in group name '©'", 3)

Serhiy Storchaka

2014-09-29 22:49:23 +0300

[diff] [blame]

288

# Support > 100 groups.

289

pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))

290

self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')

Guido van Rossum

f473cb0

1998-01-14 16:42:17 +0000

[diff] [blame]

291

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

292

def test_re_subn(self):

293

self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))

294

self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))

295

self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))

296

self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))

Serhiy Storchaka

2016-09-25 20:36:23 +0300

[diff] [blame]

297

self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))

Victor Stinner

2014-10-29 16:58:59 +0100

[diff] [blame]

298

self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

299

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

300

def test_re_split(self):

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

301

for string in ":a:b::c", S(":a:b::c"):

302

self.assertTypedEqual(re.split(":", string),

303

['', 'a', 'b', '', 'c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

304

self.assertTypedEqual(re.split(":+", string),

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

305

['', 'a', 'b', 'c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

306

self.assertTypedEqual(re.split("(:+)", string),

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

307

['', ':', 'a', ':', 'b', '::', 'c'])

308

for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),

309

memoryview(b":a:b::c")):

310

self.assertTypedEqual(re.split(b":", string),

311

[b'', b'a', b'b', b'', b'c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

312

self.assertTypedEqual(re.split(b":+", string),

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

313

[b'', b'a', b'b', b'c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

314

self.assertTypedEqual(re.split(b"(:+)", string),

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

315

[b'', b':', b'a', b':', b'b', b'::', b'c'])

Serhiy Storchaka

2013-10-26 10:45:48 +0300

[diff] [blame]

316

for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",

317

"\U0001d49c\U0001d49e\U0001d4b5"):

318

string = ":%s:%s::%s" % (a, b, c)

319

self.assertEqual(re.split(":", string), ['', a, b, '', c])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

320

self.assertEqual(re.split(":+", string), ['', a, b, c])

321

self.assertEqual(re.split("(:+)", string),

Serhiy Storchaka

2013-10-26 10:45:48 +0300

[diff] [blame]

322

['', ':', a, ':', b, '::', c])

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

323

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

324

self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])

325

self.assertEqual(re.split("(:)+", ":a:b::c"),

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

326

['', ':', 'a', ':', 'b', ':', 'c'])

327

self.assertEqual(re.split("([b:]+)", ":a:b::c"),

328

['', ':', 'a', ':b::', 'c'])

329

self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),

330

['', None, ':', 'a', None, ':', '', 'b', None, '',

331

None, '::', 'c'])

332

self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),

333

['', 'a', '', '', 'c'])

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

334

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

335

for sep, expected in [

336

(':*', ['', 'a', 'b', 'c']),

337

('(?::*)', ['', 'a', 'b', 'c']),

338

('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),

339

('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),

340

]:

341

with self.subTest(sep=sep), self.assertWarns(FutureWarning):

342

self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

343

344

for sep, expected in [

345

('', [':a:b::c']),

346

(r'\b', [':a:b::c']),

347

(r'(?=:)', [':a:b::c']),

348

(r'(?<=:)', [':a:b::c']),

349

]:

350

with self.subTest(sep=sep), self.assertRaises(ValueError):

351

self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

352

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

353

def test_qualified_re_split(self):

Serhiy Storchaka

2016-09-25 20:36:23 +0300

[diff] [blame]

354

self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])

Victor Stinner

2014-10-29 16:58:59 +0100

[diff] [blame]

355

self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])

356

self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])

357

self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

358

['', ':', 'a', ':', 'b::c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

359

self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

360

['', ':', 'a', ':', 'b::c'])

Serhiy Storchaka

2015-02-03 11:04:19 +0200

[diff] [blame]

361

with self.assertWarns(FutureWarning):

362

self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),

363

['', ':', 'a', ':', 'b::c'])

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

364

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

365

def test_re_findall(self):

366

self.assertEqual(re.findall(":+", "abc"), [])

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

367

for string in "a:b::c:::d", S("a:b::c:::d"):

368

self.assertTypedEqual(re.findall(":+", string),

369

[":", "::", ":::"])

370

self.assertTypedEqual(re.findall("(:+)", string),

371

[":", "::", ":::"])

372

self.assertTypedEqual(re.findall("(:)(:*)", string),

373

[(":", ""), (":", ":"), (":", "::")])

374

for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),

375

memoryview(b"a:b::c:::d")):

376

self.assertTypedEqual(re.findall(b":+", string),

377

[b":", b"::", b":::"])

378

self.assertTypedEqual(re.findall(b"(:+)", string),

379

[b":", b"::", b":::"])

380

self.assertTypedEqual(re.findall(b"(:)(:*)", string),

381

[(b":", b""), (b":", b":"), (b":", b"::")])

Serhiy Storchaka

2013-10-26 10:45:48 +0300

[diff] [blame]

382

for x in ("\xe0", "\u0430", "\U0001d49c"):

383

xx = x * 2

384

xxx = x * 3

385

string = "a%sb%sc%sd" % (x, xx, xxx)

386

self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])

387

self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])

388

self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),

389

[(x, ""), (x, x), (x, xx)])

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

390

Skip Montanaro

5ba0054

2003-04-25 16:00:14 +0000

[diff] [blame]

391

def test_bug_117612(self):

392

self.assertEqual(re.findall(r"(a|(b))", "aba"),

393

[("a", ""),("b", "b"),("a", "")])

394

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

395

def test_re_match(self):

Serhiy Storchaka

2013-10-16 12:46:28 +0300

[diff] [blame]

396

for string in 'a', S('a'):

397

self.assertEqual(re.match('a', string).groups(), ())

398

self.assertEqual(re.match('(a)', string).groups(), ('a',))

399

self.assertEqual(re.match('(a)', string).group(0), 'a')

400

self.assertEqual(re.match('(a)', string).group(1), 'a')

401

self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))

402

for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):

403

self.assertEqual(re.match(b'a', string).groups(), ())

404

self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))

405

self.assertEqual(re.match(b'(a)', string).group(0), b'a')

406

self.assertEqual(re.match(b'(a)', string).group(1), b'a')

407

self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))

Serhiy Storchaka

2013-10-26 10:45:48 +0300

[diff] [blame]

408

for a in ("\xe0", "\u0430", "\U0001d49c"):

409

self.assertEqual(re.match(a, a).groups(), ())

410

self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))

411

self.assertEqual(re.match('(%s)' % a, a).group(0), a)

412

self.assertEqual(re.match('(%s)' % a, a).group(1), a)

413

self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

414

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

415

pat = re.compile('((a)|(b))(c)?')

416

self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))

417

self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))

418

self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))

419

self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))

420

self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))

Guido van Rossum

8430c58

1998-04-03 21:47:12 +0000

[diff] [blame]

421

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

422

# A single group

423

m = re.match('(a)', 'a')

424

self.assertEqual(m.group(0), 'a')

425

self.assertEqual(m.group(0), 'a')

426

self.assertEqual(m.group(1), 'a')

427

self.assertEqual(m.group(1, 1), ('a', 'a'))

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

428

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

429

pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')

430

self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))

431

self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),

432

(None, 'b', None))

433

self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

434

Serhiy Storchaka

32eddc1

2013-11-23 23:20:30 +0200

[diff] [blame]

435

def test_re_fullmatch(self):

436

# Issue 16203: Proposal: add re.fullmatch() method.

437

self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))

438

for string in "ab", S("ab"):

439

self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))

440

for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):

441

self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))

442

for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":

443

r = r"%s|%s" % (a, a + b)

444

self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))

445

self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))

446

self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))

447

self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))

448

self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))

449

self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))

450

self.assertIsNone(re.fullmatch(r"a+", "ab"))

451

self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))

452

self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))

453

self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))

454

self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))

455

self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))

456

self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))

457

458

self.assertEqual(

459

re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))

460

self.assertEqual(

461

re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))

462

self.assertEqual(

463

re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))

464

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

465

def test_re_groupref_exists(self):

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

466

self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', '(a)').groups(),

467

('(', 'a'))

468

self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', 'a').groups(),

469

(None, 'a'))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

470

self.assertIsNone(re.match('^($)?([^()]+)(?(1)$)$', 'a)'))

471

self.assertIsNone(re.match('^($)?([^()]+)(?(1)$)$', '(a'))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

472

self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),

473

('a', 'b'))

474

self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),

475

(None, 'd'))

476

self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),

477

(None, 'd'))

478

self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),

479

('a', ''))

480

Michael W. Hudson

e7fa1af

2005-06-03 13:55:58 +0000

[diff] [blame]

481

# Tests for bug #1177831: exercise groups other than the first group

482

p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')

483

self.assertEqual(p.match('abc').groups(),

484

('a', 'b', 'c'))

485

self.assertEqual(p.match('ad').groups(),

486

('a', None, 'd'))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

487

self.assertIsNone(p.match('abd'))

488

self.assertIsNone(p.match('ac'))

Michael W. Hudson

e7fa1af

2005-06-03 13:55:58 +0000

[diff] [blame]

489

Serhiy Storchaka

2014-09-29 22:49:23 +0300

[diff] [blame]

490

# Support > 100 groups.

491

pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))

492

pat = '(?:%s)(?(200)z)' % pat

493

self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

Andrew M. Kuchling

3554cad

2005-06-02 13:38:45 +0000

[diff] [blame]

494

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

495

self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)

496

self.checkPatternError(r'()(?(1)a|b',

497

'missing ), unterminated subpattern', 2)

498

self.checkPatternError(r'()(?(1)a|b|c)',

499

'conditional backref with more than '

500

'two branches', 10)

501

502

def test_re_groupref_overflow(self):

503

self.checkTemplateError('()', '\g<%s>' % sre_constants.MAXGROUPS, 'xx',

504

'invalid group reference', 3)

505

self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,

506

'invalid group reference', 10)

507

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

508

def test_re_groupref(self):

509

self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),

510

('|', 'a'))

511

self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),

512

(None, 'a'))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

513

self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|'))

514

self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a'))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

515

self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),

516

('a', 'a'))

517

self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),

518

(None, None))

519

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

520

self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)

521

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

522

def test_groupdict(self):

523

self.assertEqual(re.match('(?P<first>first) (?P<second>second)',

524

'first second').groupdict(),

525

{'first':'first', 'second':'second'})

526

527

def test_expand(self):

528

self.assertEqual(re.match("(?P<first>first) (?P<second>second)",

529

"first second")

530

.expand(r"\2 \1 \g<second> \g<first>"),

531

"second first second first")

Serhiy Storchaka

7438e4b

2014-10-10 11:06:31 +0300

[diff] [blame]

532

self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",

533

"first")

534

.expand(r"\2 \g<second>"),

535

" ")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

536

537

def test_repeat_minmax(self):

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

538

self.assertIsNone(re.match("^(\w){1}$", "abc"))

539

self.assertIsNone(re.match("^(\w){1}?$", "abc"))

540

self.assertIsNone(re.match("^(\w){1,2}$", "abc"))

541

self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

542

543

self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")

544

self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")

545

self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")

546

self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")

547

self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")

548

self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")

549

self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")

550

self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")

551

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

552

self.assertIsNone(re.match("^x{1}$", "xxx"))

553

self.assertIsNone(re.match("^x{1}?$", "xxx"))

554

self.assertIsNone(re.match("^x{1,2}$", "xxx"))

555

self.assertIsNone(re.match("^x{1,2}?$", "xxx"))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

556

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

557

self.assertTrue(re.match("^x{3}$", "xxx"))

558

self.assertTrue(re.match("^x{1,3}$", "xxx"))

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

559

self.assertTrue(re.match("^x{3,3}$", "xxx"))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

560

self.assertTrue(re.match("^x{1,4}$", "xxx"))

561

self.assertTrue(re.match("^x{3,4}?$", "xxx"))

562

self.assertTrue(re.match("^x{3}?$", "xxx"))

563

self.assertTrue(re.match("^x{1,3}?$", "xxx"))

564

self.assertTrue(re.match("^x{1,4}?$", "xxx"))

565

self.assertTrue(re.match("^x{3,4}?$", "xxx"))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

566

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

567

self.assertIsNone(re.match("^x{}$", "xxx"))

568

self.assertTrue(re.match("^x{}$", "x{}"))

Gustavo Niemeyer

6fa0c5a

2005-09-14 08:54:39 +0000

[diff] [blame]

569

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

570

self.checkPatternError(r'x{2,1}',

571

'min repeat greater than max repeat', 2)

572

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

573

def test_getattr(self):

Amaury Forgeot d'Arc

e43d33a

2008-07-02 20:50:16 +0000

[diff] [blame]

574

self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

575

self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)

Amaury Forgeot d'Arc

e43d33a

2008-07-02 20:50:16 +0000

[diff] [blame]

576

self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)

577

self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})

578

self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,

579

{'first': 1, 'other': 2})

580

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

581

self.assertEqual(re.match("(a)", "a").pos, 0)

582

self.assertEqual(re.match("(a)", "a").endpos, 1)

583

self.assertEqual(re.match("(a)", "a").string, "a")

584

self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

585

self.assertTrue(re.match("(a)", "a").re)

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

586

Serhiy Storchaka

07360df

2015-03-30 01:01:48 +0300

[diff] [blame]

587

# Issue 14260. groupindex should be non-modifiable mapping.

588

p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')

589

self.assertEqual(sorted(p.groupindex), ['first', 'other'])

590

self.assertEqual(p.groupindex['other'], 2)

591

with self.assertRaises(TypeError):

592

p.groupindex['other'] = 0

593

self.assertEqual(p.groupindex['other'], 2)

594

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

595

def test_special_escapes(self):

596

self.assertEqual(re.search(r"\b(b.)\b",

597

"abcd abc bcd bx").group(1), "bx")

598

self.assertEqual(re.search(r"\B(b.)\B",

599

"abc bcd bc abxd").group(1), "bx")

600

self.assertEqual(re.search(r"\b(b.)\b",

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

601

"abcd abc bcd bx", re.ASCII).group(1), "bx")

602

self.assertEqual(re.search(r"\B(b.)\B",

603

"abc bcd bc abxd", re.ASCII).group(1), "bx")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

604

self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")

605

self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

606

self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

607

self.assertEqual(re.search(br"\b(b.)\b",

608

b"abcd abc bcd bx").group(1), b"bx")

609

self.assertEqual(re.search(br"\B(b.)\B",

610

b"abc bcd bc abxd").group(1), b"bx")

611

self.assertEqual(re.search(br"\b(b.)\b",

612

b"abcd abc bcd bx", re.LOCALE).group(1), b"bx")

613

self.assertEqual(re.search(br"\B(b.)\B",

614

b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")

615

self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")

616

self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

617

self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

618

self.assertEqual(re.search(r"\d\D\w\W\s\S",

619

"1aa! a").group(0), "1aa! a")

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

620

self.assertEqual(re.search(br"\d\D\w\W\s\S",

621

b"1aa! a").group(0), b"1aa! a")

622

self.assertEqual(re.search(r"\d\D\w\W\s\S",

623

"1aa! a", re.ASCII).group(0), "1aa! a")

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

624

self.assertEqual(re.search(br"\d\D\w\W\s\S",

625

b"1aa! a", re.LOCALE).group(0), b"1aa! a")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

626

Serhiy Storchaka

2014-11-10 14:38:16 +0200

[diff] [blame]

627

def test_other_escapes(self):

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

628

self.checkPatternError("\\", 'bad escape (end of pattern)', 0)

Serhiy Storchaka

2014-11-10 14:38:16 +0200

[diff] [blame]

629

self.assertEqual(re.match(r"\(", '(').group(), '(')

630

self.assertIsNone(re.match(r"\(", ')'))

631

self.assertEqual(re.match(r"\\", '\\').group(), '\\')

Serhiy Storchaka

2014-11-10 14:38:16 +0200

[diff] [blame]

632

self.assertEqual(re.match(r"[\]]", ']').group(), ']')

633

self.assertIsNone(re.match(r"[\]]", '['))

634

self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')

635

self.assertIsNone(re.match(r"[a\-c]", 'b'))

636

self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')

637

self.assertIsNone(re.match(r"[\^a]+", 'b'))

Serhiy Storchaka

2015-03-24 22:58:14 +0200

[diff] [blame]

638

re.purge() # for warnings

639

for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':

640

with self.subTest(c):

641

with self.assertWarns(DeprecationWarning):

642

self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)

643

self.assertIsNone(re.match('\\%c' % c, 'a'))

644

for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':

645

with self.subTest(c):

646

with self.assertWarns(DeprecationWarning):

647

self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)

648

self.assertIsNone(re.match('[\\%c]' % c, 'a'))

Serhiy Storchaka

2014-11-10 14:38:16 +0200

[diff] [blame]

649

Ezio Melotti

5a045b9

2012-02-29 11:48:44 +0200

[diff] [blame]

650

def test_string_boundaries(self):

651

# See http://bugs.python.org/issue10713

652

self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),

653

"abc")

654

# There's a word boundary at the start of a string.

655

self.assertTrue(re.match(r"\b", "abc"))

656

# A non-empty string includes a non-boundary zero-length match.

657

self.assertTrue(re.search(r"\B", "abc"))

658

# There is no non-boundary match at the start of a string.

659

self.assertFalse(re.match(r"\B", "abc"))

660

# However, an empty string contains no word boundaries, and also no

661

# non-boundaries.

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

662

self.assertIsNone(re.search(r"\B", ""))

Ezio Melotti

5a045b9

2012-02-29 11:48:44 +0200

[diff] [blame]

663

# This one is questionable and different from the perlre behaviour,

664

# but describes current behavior.

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

665

self.assertIsNone(re.search(r"\b", ""))

Ezio Melotti

5a045b9

2012-02-29 11:48:44 +0200

[diff] [blame]

666

# A single word-character string has two boundaries, but no

667

# non-boundary gaps.

668

self.assertEqual(len(re.findall(r"\b", "a")), 2)

669

self.assertEqual(len(re.findall(r"\B", "a")), 0)

670

# If there are no words, there are no boundaries

671

self.assertEqual(len(re.findall(r"\b", " ")), 0)

672

self.assertEqual(len(re.findall(r"\b", " ")), 0)

673

# Can match around the whitespace.

674

self.assertEqual(len(re.findall(r"\B", " ")), 2)

675

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

676

def test_bigcharset(self):

Guido van Rossum

ef87d6e

2007-05-02 19:09:54 +0000

[diff] [blame]

677

self.assertEqual(re.match("([\u2222\u2223])",

678

"\u2222").group(1), "\u2222")

Serhiy Storchaka

be80fc9

2013-10-24 22:02:58 +0300

[diff] [blame]

679

r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

680

self.assertEqual(re.match(r, "\uff01").group(), "\uff01")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

681

Antoine Pitrou

39bdad8

2012-11-20 22:30:42 +0100

[diff] [blame]

682

def test_big_codesize(self):

683

# Issue #1160

684

r = re.compile('|'.join(('%d'%x for x in range(10000))))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

685

self.assertTrue(r.match('1000'))

686

self.assertTrue(r.match('9999'))

Antoine Pitrou

39bdad8

2012-11-20 22:30:42 +0100

[diff] [blame]

687

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

688

def test_anyall(self):

689

self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),

690

"a\nb")

691

self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),

692

"a\n\nb")

693

Serhiy Storchaka

4eea62f

2015-02-21 10:07:35 +0200

[diff] [blame]

694

def test_lookahead(self):

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

695

self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")

696

self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")

697

self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")

698

self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")

699

self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")

700

self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")

701

self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")

702

703

self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")

704

self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")

705

self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")

706

self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")

707

Serhiy Storchaka

4eea62f

2015-02-21 10:07:35 +0200

[diff] [blame]

708

# Group reference.

709

self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))

710

self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))

711

# Conditional group reference.

712

self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))

713

self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))

714

self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))

715

self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))

716

self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))

717

# Group used before defined.

718

self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))

719

self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))

720

self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))

721

722

def test_lookbehind(self):

723

self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))

724

self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))

725

self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))

726

self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))

727

# Group reference.

728

self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))

729

self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))

730

self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))

731

self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))

732

# Conditional group reference.

733

self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))

734

self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))

735

self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))

736

self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))

737

self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))

738

# Group used before defined.

739

self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')

740

self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))

741

self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))

742

# Group defined in the same lookbehind pattern

743

self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')

744

self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')

745

self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')

746

self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')

747

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

748

def test_ignore_case(self):

Benjamin Peterson

a786b02

2008-08-25 21:05:21 +0000

[diff] [blame]

749

self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

750

self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

751

self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")

752

self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")

753

self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")

754

self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")

755

self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")

756

self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")

757

self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")

758

self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")

759

Serhiy Storchaka

0c938f6

2014-11-10 12:37:16 +0200

[diff] [blame]

760

assert '\u212a'.lower() == 'k' # 'K'

761

self.assertTrue(re.match(r'K', '\u212a', re.I))

762

self.assertTrue(re.match(r'k', '\u212a', re.I))

763

self.assertTrue(re.match(r'\u212a', 'K', re.I))

764

self.assertTrue(re.match(r'\u212a', 'k', re.I))

765

assert '\u017f'.upper() == 'S' # 'ſ'

766

self.assertTrue(re.match(r'S', '\u017f', re.I))

767

self.assertTrue(re.match(r's', '\u017f', re.I))

768

self.assertTrue(re.match(r'\u017f', 'S', re.I))

769

self.assertTrue(re.match(r'\u017f', 's', re.I))

770

assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'

771

self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))

772

self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))

773

774

def test_ignore_case_set(self):

775

self.assertTrue(re.match(r'[19A]', 'A', re.I))

776

self.assertTrue(re.match(r'[19a]', 'a', re.I))

777

self.assertTrue(re.match(r'[19a]', 'A', re.I))

778

self.assertTrue(re.match(r'[19A]', 'a', re.I))

779

self.assertTrue(re.match(br'[19A]', b'A', re.I))

780

self.assertTrue(re.match(br'[19a]', b'a', re.I))

781

self.assertTrue(re.match(br'[19a]', b'A', re.I))

782

self.assertTrue(re.match(br'[19A]', b'a', re.I))

783

assert '\u212a'.lower() == 'k' # 'K'

784

self.assertTrue(re.match(r'[19K]', '\u212a', re.I))

785

self.assertTrue(re.match(r'[19k]', '\u212a', re.I))

786

self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))

787

self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))

788

assert '\u017f'.upper() == 'S' # 'ſ'

789

self.assertTrue(re.match(r'[19S]', '\u017f', re.I))

790

self.assertTrue(re.match(r'[19s]', '\u017f', re.I))

791

self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))

792

self.assertTrue(re.match(r'[19\u017f]', 's', re.I))

793

assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'

794

self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))

795

self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))

796

Serhiy Storchaka

4b8f894

2014-10-31 12:36:56 +0200

[diff] [blame]

797

def test_ignore_case_range(self):

798

# Issues #3511, #17381.

799

self.assertTrue(re.match(r'[9-a]', '_', re.I))

800

self.assertIsNone(re.match(r'[9-A]', '_', re.I))

801

self.assertTrue(re.match(br'[9-a]', b'_', re.I))

802

self.assertIsNone(re.match(br'[9-A]', b'_', re.I))

803

self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I))

804

self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I))

805

self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I))

806

self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I))

807

self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I))

808

self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I))

809

self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I))

810

self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I))

811

self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I))

812

self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I))

813

self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))

814

self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))

815

Serhiy Storchaka

0c938f6

2014-11-10 12:37:16 +0200

[diff] [blame]

816

assert '\u212a'.lower() == 'k' # 'K'

817

self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))

818

self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))

819

self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))

820

self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))

821

assert '\u017f'.upper() == 'S' # 'ſ'

822

self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))

823

self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))

824

self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))

825

self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))

826

assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'

827

self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))

828

self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))

829

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

830

def test_category(self):

831

self.assertEqual(re.match(r"(\s)", " ").group(1), " ")

832

833

def test_getlower(self):

834

import _sre

835

self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))

836

self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))

837

self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

838

self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

839

840

self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

841

self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

842

self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")

843

self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

844

845

def test_not_literal(self):

846

self.assertEqual(re.search("\s([^a])", " b").group(1), "b")

847

self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")

848

849

def test_search_coverage(self):

850

self.assertEqual(re.search("\s(b)", " b").group(1), "b")

851

self.assertEqual(re.search("a\s", "a ").group(0), "a ")

852

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

853

def assertMatch(self, pattern, text, match=None, span=None,

854

matcher=re.match):

855

if match is None and span is None:

856

# the pattern matches the whole text

857

match = text

858

span = (0, len(text))

859

elif match is None or span is None:

860

raise ValueError('If match is not None, span should be specified '

861

'(and vice versa).')

862

m = matcher(pattern, text)

863

self.assertTrue(m)

864

self.assertEqual(m.group(), match)

865

self.assertEqual(m.span(), span)

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

866

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

867

def test_re_escape(self):

Ezio Melotti

88fdeb4

2011-04-10 12:59:16 +0300

[diff] [blame]

868

alnum_chars = string.ascii_letters + string.digits + '_'

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

869

p = ''.join(chr(i) for i in range(256))

870

for c in p:

871

if c in alnum_chars:

872

self.assertEqual(re.escape(c), c)

873

elif c == '\x00':

874

self.assertEqual(re.escape(c), '\\000')

875

else:

876

self.assertEqual(re.escape(c), '\\' + c)

877

self.assertMatch(re.escape(c), c)

878

self.assertMatch(re.escape(p), p)

Guido van Rossum

1997-07-18 04:26:25 +0000

[diff] [blame]

879

Guido van Rossum

2008-09-10 17:44:35 +0000

[diff] [blame]

880

def test_re_escape_byte(self):

Ezio Melotti

88fdeb4

2011-04-10 12:59:16 +0300

[diff] [blame]

881

alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

882

p = bytes(range(256))

883

for i in p:

Guido van Rossum

2008-09-10 17:44:35 +0000

[diff] [blame]

884

b = bytes([i])

Ezio Melotti

2011-03-25 14:08:44 +0200

[diff] [blame]

885

if b in alnum_chars:

886

self.assertEqual(re.escape(b), b)

887

elif i == 0:

888

self.assertEqual(re.escape(b), b'\\000')

889

else:

890

self.assertEqual(re.escape(b), b'\\' + b)

891

self.assertMatch(re.escape(b), b)

892

self.assertMatch(re.escape(p), p)

Guido van Rossum

2008-09-10 17:44:35 +0000

[diff] [blame]

893

Ezio Melotti

7b9e97b

2011-03-25 14:09:33 +0200

[diff] [blame]

894

def test_re_escape_non_ascii(self):

895

s = 'xxx\u2620\u2620\u2620xxx'

896

s_escaped = re.escape(s)

897

self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')

898

self.assertMatch(s_escaped, s)

899

self.assertMatch('.%s+.' % re.escape('\u2620'), s,

900

'x\u2620\u2620\u2620x', (2, 7), re.search)

901

902

def test_re_escape_non_ascii_bytes(self):

903

b = 'y\u2620y\u2620y'.encode('utf-8')

904

b_escaped = re.escape(b)

905

self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')

906

self.assertMatch(b_escaped, b)

907

res = re.findall(re.escape('\u2620'.encode('utf-8')), b)

908

self.assertEqual(len(res), 2)

Guido van Rossum

2008-09-10 17:44:35 +0000

[diff] [blame]

909

Serhiy Storchaka

b85a976

2014-09-15 11:33:19 +0300

[diff] [blame]

910

def test_pickling(self):

911

import pickle

912

oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE)

913

for proto in range(pickle.HIGHEST_PROTOCOL + 1):

914

pickled = pickle.dumps(oldpat, proto)

915

newpat = pickle.loads(pickled)

916

self.assertEqual(newpat, oldpat)

917

# current pickle expects the _compile() reconstructor in re module

918

from re import _compile

Guido van Rossum

1997-07-17 22:36:14 +0000

[diff] [blame]

919

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

920

def test_constants(self):

921

self.assertEqual(re.I, re.IGNORECASE)

922

self.assertEqual(re.L, re.LOCALE)

923

self.assertEqual(re.M, re.MULTILINE)

924

self.assertEqual(re.S, re.DOTALL)

925

self.assertEqual(re.X, re.VERBOSE)

Fredrik Lundh

1151a8c

2000-08-08 16:47:42 +0000

[diff] [blame]

926

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

927

def test_flags(self):

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

928

for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

929

self.assertTrue(re.compile('^pattern$', flag))

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

930

for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:

931

self.assertTrue(re.compile(b'^pattern$', flag))

Guido van Rossum

f473cb0

1998-01-14 16:42:17 +0000

[diff] [blame]

932

Skip Montanaro

7d9963f

2003-04-25 14:12:40 +0000

[diff] [blame]

933

def test_sre_character_literals(self):

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

934

for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:

935

if i < 256:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

936

self.assertTrue(re.match(r"\%03o" % i, chr(i)))

937

self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0"))

938

self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8"))

939

self.assertTrue(re.match(r"\x%02x" % i, chr(i)))

940

self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0"))

941

self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z"))

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

942

if i < 0x10000:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

943

self.assertTrue(re.match(r"\u%04x" % i, chr(i)))

944

self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0"))

945

self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z"))

946

self.assertTrue(re.match(r"\U%08x" % i, chr(i)))

947

self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0"))

948

self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z"))

949

self.assertTrue(re.match(r"\0", "\000"))

950

self.assertTrue(re.match(r"\08", "\0008"))

951

self.assertTrue(re.match(r"\01", "\001"))

952

self.assertTrue(re.match(r"\018", "\0018"))

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

953

self.checkPatternError(r"\567",

954

r'octal escape value \567 outside of '

955

r'range 0-0o377', 0)

956

self.checkPatternError(r"\911", 'invalid group reference', 0)

957

self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)

958

self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)

959

self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)

960

self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)

961

self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)

962

self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)

963

self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)

Skip Montanaro

7d9963f

2003-04-25 14:12:40 +0000

[diff] [blame]

964

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

965

def test_sre_character_class_literals(self):

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

966

for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:

967

if i < 256:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

968

self.assertTrue(re.match(r"[\%o]" % i, chr(i)))

969

self.assertTrue(re.match(r"[\%o8]" % i, chr(i)))

970

self.assertTrue(re.match(r"[\%03o]" % i, chr(i)))

971

self.assertTrue(re.match(r"[\%03o0]" % i, chr(i)))

972

self.assertTrue(re.match(r"[\%03o8]" % i, chr(i)))

973

self.assertTrue(re.match(r"[\x%02x]" % i, chr(i)))

974

self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i)))

975

self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i)))

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

976

if i < 0x10000:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

977

self.assertTrue(re.match(r"[\u%04x]" % i, chr(i)))

978

self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i)))

979

self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i)))

980

self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))

981

self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))

982

self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

983

self.checkPatternError(r"[\567]",

984

r'octal escape value \567 outside of '

985

r'range 0-0o377', 1)

986

self.checkPatternError(r"[\911]", r'bad escape \9', 1)

987

self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1)

988

self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1)

989

self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1)

990

self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1)

Serhiy Storchaka

c563caf

2014-09-23 23:22:41 +0300

[diff] [blame]

991

self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

992

993

def test_sre_byte_literals(self):

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

994

for i in [0, 8, 16, 32, 64, 127, 128, 255]:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

995

self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i])))

996

self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))

997

self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))

998

self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))

999

self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))

1000

self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))

Serhiy Storchaka

2015-03-24 22:58:14 +0200

[diff] [blame]

1001

with self.assertWarns(DeprecationWarning):

1002

self.assertTrue(re.match(br"\u1234", b'u1234'))

1003

with self.assertWarns(DeprecationWarning):

1004

self.assertTrue(re.match(br"\U00012345", b'U00012345'))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1005

self.assertTrue(re.match(br"\0", b"\000"))

1006

self.assertTrue(re.match(br"\08", b"\0008"))

1007

self.assertTrue(re.match(br"\01", b"\001"))

1008

self.assertTrue(re.match(br"\018", b"\0018"))

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1009

self.checkPatternError(br"\567",

1010

r'octal escape value \567 outside of '

1011

r'range 0-0o377', 0)

1012

self.checkPatternError(br"\911", 'invalid group reference', 0)

1013

self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)

1014

self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)

Antoine Pitrou

2012-06-23 13:29:19 +0200

[diff] [blame]

1015

1016

def test_sre_byte_class_literals(self):

1017

for i in [0, 8, 16, 32, 64, 127, 128, 255]:

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1018

self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))

1019

self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))

1020

self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))

1021

self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))

1022

self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))

1023

self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))

1024

self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))

1025

self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))

Serhiy Storchaka

2015-03-24 22:58:14 +0200

[diff] [blame]

1026

with self.assertWarns(DeprecationWarning):

1027

self.assertTrue(re.match(br"[\u1234]", b'u'))

1028

with self.assertWarns(DeprecationWarning):

1029

self.assertTrue(re.match(br"[\U00012345]", b'U'))

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1030

self.checkPatternError(br"[\567]",

1031

r'octal escape value \567 outside of '

1032

r'range 0-0o377', 1)

1033

self.checkPatternError(br"[\911]", r'bad escape \9', 1)

1034

self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1)

1035

1036

def test_character_set_errors(self):

1037

self.checkPatternError(r'[', 'unterminated character set', 0)

1038

self.checkPatternError(r'[^', 'unterminated character set', 0)

1039

self.checkPatternError(r'[a', 'unterminated character set', 0)

1040

# bug 545855 -- This pattern failed to cause a compile error as it

1041

# should, instead provoking a TypeError.

1042

self.checkPatternError(r"[a-", 'unterminated character set', 0)

1043

self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1)

1044

self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1)

1045

self.checkPatternError(r"[b-a]", 'bad character range b-a', 1)

Gustavo Niemeyer

2004-09-03 17:06:10 +0000

[diff] [blame]

1046

Skip Montanaro

7d9963f

2003-04-25 14:12:40 +0000

[diff] [blame]

1047

def test_bug_113254(self):

1048

self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)

1049

self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)

1050

self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))

1051

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

1052

def test_bug_527371(self):

1053

# bug described in patches 527371/672491

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1054

self.assertIsNone(re.match(r'(a)?a','a').lastindex)

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

1055

self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)

1056

self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')

1057

self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')

1058

self.assertEqual(re.match("((a))", "a").lastindex, 1)

1059

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

1060

def test_bug_418626(self):

1061

# bugs 418626 at al. -- Testing Greg Chapman's addition of op code

1062

# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of

1063

# pattern '*?' on a long string.

1064

self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)

1065

self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),

1066

20003)

1067

self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)

Gustavo Niemeyer

2003-10-17 22:13:16 +0000

[diff] [blame]

1068

# non-simple '*?' still used to hit the recursion limit, before the

Tim Peters

58eb11c

2004-01-18 20:29:55 +0000

[diff] [blame]

1069

# non-recursive scheme was implemented.

Gustavo Niemeyer

2003-10-17 22:13:16 +0000

[diff] [blame]

1070

self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

1071

1072

def test_bug_612074(self):

Guido van Rossum

ef87d6e

2007-05-02 19:09:54 +0000

[diff] [blame]

1073

pat="["+re.escape("\u2039")+"]"

Skip Montanaro

2003-04-25 14:31:54 +0000

[diff] [blame]

1074

self.assertEqual(re.compile(pat) and 1, 1)

1075

Skip Montanaro

2003-04-25 15:40:28 +0000

[diff] [blame]

1076

def test_stack_overflow(self):

Gustavo Niemeyer

2003-10-17 22:13:16 +0000

[diff] [blame]

1077

# nasty cases that used to overflow the straightforward recursive

Skip Montanaro

2003-04-25 15:40:28 +0000

[diff] [blame]

1078

# implementation of repeated groups.

Gustavo Niemeyer

2003-10-17 22:13:16 +0000

[diff] [blame]

1079

self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')

1080

self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')

1081

self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')

Skip Montanaro

2003-04-25 15:40:28 +0000

[diff] [blame]

1082

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1083

def test_nothing_to_repeat(self):

1084

for reps in '*', '+', '?', '{1,2}':

1085

for mod in '', '?':

1086

self.checkPatternError('%s%s' % (reps, mod),

1087

'nothing to repeat', 0)

1088

self.checkPatternError('(?:%s%s)' % (reps, mod),

1089

'nothing to repeat', 3)

1090

1091

def test_multiple_repeat(self):

1092

for outer_reps in '*', '+', '{1,2}':

1093

for outer_mod in '', '?':

1094

outer_op = outer_reps + outer_mod

1095

for inner_reps in '*', '+', '?', '{1,2}':

1096

for inner_mod in '', '?':

1097

inner_op = inner_reps + inner_mod

1098

self.checkPatternError(r'x%s%s' % (inner_op, outer_op),

1099

'multiple repeat', 1 + len(inner_op))

1100

Serhiy Storchaka

fa46816

2013-02-16 21:23:53 +0200

[diff] [blame]

1101

def test_unlimited_zero_width_repeat(self):

1102

# Issue #9669

1103

self.assertIsNone(re.match(r'(?:a?)*y', 'z'))

1104

self.assertIsNone(re.match(r'(?:a?)+y', 'z'))

1105

self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))

1106

self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))

1107

self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))

1108

self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))

1109

Skip Montanaro

2003-04-25 15:40:28 +0000

[diff] [blame]

1110

def test_scanner(self):

1111

def s_ident(scanner, token): return token

1112

def s_operator(scanner, token): return "op%s" % token

1113

def s_float(scanner, token): return float(token)

1114

def s_int(scanner, token): return int(token)

1115

1116

scanner = Scanner([

1117

(r"[a-zA-Z_]\w*", s_ident),

1118

(r"\d+\.\d*", s_float),

1119

(r"\d+", s_int),

1120

(r"=|\+|-|\*|/", s_operator),

(r"\s+", None),

])

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1124

self.assertTrue(scanner.scanner.scanner("").pattern)

Gustavo Niemeyer

2003-06-20 00:25:14 +0000

[diff] [blame]

1125

Skip Montanaro

2003-04-25 15:40:28 +0000

[diff] [blame]

1126

self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),

1127

(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,

1128

'op+', 'bar'], ''))

1129

Skip Montanaro

5ba0054

2003-04-25 16:00:14 +0000

[diff] [blame]

1130

def test_bug_448951(self):

1131

# bug 448951 (similar to 429357, but with single char match)

1132

# (Also test greedy matches.)

1133

for op in '','?','*':

1134

self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),

1135

(None, None))

1136

self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),

1137

('a:', 'a'))

1138

Gustavo Niemeyer

c34f255

2003-04-27 12:34:14 +0000

[diff] [blame]

1139

def test_bug_725106(self):

1140

# capturing groups in alternatives in repeats

1141

self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),

1142

('b', 'a'))

1143

self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),

1144

('c', 'b'))

1145

self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),

1146

('b', None))

1147

self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),

1148

('b', None))

1149

self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),

1150

('b', 'a'))

1151

self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),

1152

('c', 'b'))

1153

self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),

1154

('b', None))

1155

self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),

1156

('b', None))

1157

Gustavo Niemeyer

3646ab9

2003-04-27 13:25:21 +0000

[diff] [blame]

1158

def test_bug_725149(self):

1159

# mark_stack_base restoring before restoring marks

1160

self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),

1161

('a', None))

1162

self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),

1163

('a', None, None))

1164

Just van Rossum

12723ba

2003-07-02 20:03:04 +0000

[diff] [blame]

1165

def test_bug_764548(self):

1166

# bug 764548, re.compile() barfs on str/unicode subclasses

Guido van Rossum

ef87d6e

2007-05-02 19:09:54 +0000

[diff] [blame]

1167

class my_unicode(str): pass

Just van Rossum

12723ba

2003-07-02 20:03:04 +0000

[diff] [blame]

1168

pat = re.compile(my_unicode("abc"))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1169

self.assertIsNone(pat.match("xyz"))

Just van Rossum

12723ba

2003-07-02 20:03:04 +0000

[diff] [blame]

1170

Skip Montanaro

5ba0054

2003-04-25 16:00:14 +0000

[diff] [blame]

1171

def test_finditer(self):

1172

iter = re.finditer(r":+", "a:b::c:::d")

1173

self.assertEqual([item.group(0) for item in iter],

1174

[":", "::", ":::"])

1175

Sean Reifschneider

7b3c975

2012-03-12 18:22:38 -0600

[diff] [blame]

1176

pat = re.compile(r":+")

1177

iter = pat.finditer("a:b::c:::d", 1, 10)

1178

self.assertEqual([item.group(0) for item in iter],

1179

[":", "::", ":::"])

1180

1181

pat = re.compile(r":+")

1182

iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)

1183

self.assertEqual([item.group(0) for item in iter],

1184

[":", "::", ":::"])

1185

1186

pat = re.compile(r":+")

1187

iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)

1188

self.assertEqual([item.group(0) for item in iter],

1189

[":", "::", ":::"])

1190

1191

pat = re.compile(r":+")

1192

iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)

1193

self.assertEqual([item.group(0) for item in iter],

1194

["::", "::"])

1195

Thomas Wouters

40a088d

2008-03-18 20:19:54 +0000

[diff] [blame]

1196

def test_bug_926075(self):

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1197

self.assertIsNot(re.compile('bug_926075'),

1198

re.compile(b'bug_926075'))

Hye-Shik Chang

9f62ecc

2004-04-20 21:30:07 +0000

[diff] [blame]

1199

Martin v. Löwis

7d9c6c7

2004-05-07 07:18:13 +0000

[diff] [blame]

1200

def test_bug_931848(self):

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

1201

pattern = "[\u002E\u3002\uFF0E\uFF61]"

Martin v. Löwis

7d9c6c7

2004-05-07 07:18:13 +0000

[diff] [blame]

1202

self.assertEqual(re.compile(pattern).split("a.b.c"),

1203

['a','b','c'])

1204

Gustavo Niemeyer

2004-09-03 18:11:59 +0000

[diff] [blame]

1205

def test_bug_581080(self):

1206

iter = re.finditer(r"\s", "a b")

Georg Brandl

a18af4e

2007-04-21 15:47:16 +0000

[diff] [blame]

1207

self.assertEqual(next(iter).span(), (1,2))

1208

self.assertRaises(StopIteration, next, iter)

Gustavo Niemeyer

2004-09-03 18:11:59 +0000

[diff] [blame]

1209

1210

scanner = re.compile(r"\s").scanner("a b")

1211

self.assertEqual(scanner.search().span(), (1, 2))

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1212

self.assertIsNone(scanner.search())

Gustavo Niemeyer

2004-09-03 18:11:59 +0000

[diff] [blame]

1213

1214

def test_bug_817234(self):

1215

iter = re.finditer(r".*", "asdf")

Georg Brandl

a18af4e

2007-04-21 15:47:16 +0000

[diff] [blame]

1216

self.assertEqual(next(iter).span(), (0, 4))

1217

self.assertEqual(next(iter).span(), (4, 4))

1218

self.assertRaises(StopIteration, next, iter)

Gustavo Niemeyer

2004-09-03 18:11:59 +0000

[diff] [blame]

1219

Mark Dickinson

1f26828

2009-07-28 17:22:36 +0000

[diff] [blame]

1220

def test_bug_6561(self):

1221

# '\d' should match characters in Unicode category 'Nd'

1222

# (Number, Decimal Digit), but not those in 'Nl' (Number,

1223

# Letter) or 'No' (Number, Other).

1224

decimal_digits = [

1225

'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'

1226

'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'

1227

'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'

1228

]

1229

for x in decimal_digits:

1230

self.assertEqual(re.match('^\d$', x).group(0), x)

1231

1232

not_decimal_digits = [

1233

'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'

1234

'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'

1235

'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'

1236

'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'

1237

]

1238

for x in not_decimal_digits:

1239

self.assertIsNone(re.match('^\d$', x))

1240

Guido van Rossum

d8faa36

2007-04-27 19:54:29 +0000

[diff] [blame]

1241

def test_empty_array(self):

1242

# SF buf 1647541

1243

import array

Guido van Rossum

166746c

2007-07-03 15:39:16 +0000

[diff] [blame]

1244

for typecode in 'bBuhHiIlLfd':

Guido van Rossum

d8faa36

2007-04-27 19:54:29 +0000

[diff] [blame]

1245

a = array.array(typecode)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1246

self.assertIsNone(re.compile(b"bla").match(a))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1247

self.assertEqual(re.compile(b"").match(a).groups(), ())

Gustavo Niemeyer

2004-09-03 18:11:59 +0000

[diff] [blame]

1248

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1249

def test_inline_flags(self):

1250

# Bug #1700

Serhiy Storchaka

ab14088

2014-11-11 21:13:28 +0200

[diff] [blame]

1251

upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below

1252

lower_char = '\u1ea1' # Latin Small Letter A with Dot Below

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1253

1254

p = re.compile(upper_char, re.I | re.U)

1255

q = p.match(lower_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1256

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1257

1258

p = re.compile(lower_char, re.I | re.U)

1259

q = p.match(upper_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1260

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1261

1262

p = re.compile('(?i)' + upper_char, re.U)

1263

q = p.match(lower_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1264

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1265

1266

p = re.compile('(?i)' + lower_char, re.U)

1267

q = p.match(upper_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1268

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1269

1270

p = re.compile('(?iu)' + upper_char)

1271

q = p.match(lower_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1272

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1273

1274

p = re.compile('(?iu)' + lower_char)

1275

q = p.match(upper_char)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1276

self.assertTrue(q)

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1277

Serhiy Storchaka

cc66a65

2016-09-11 01:39:51 +0300

[diff] [blame]

1278

self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))

1279

self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))

1280

Christian Heimes

25bb783

2008-01-11 16:17:00 +0000

[diff] [blame]

1281

def test_dollar_matches_twice(self):

1282

"$ matches the end of string, and just before the terminating \n"

1283

pattern = re.compile('$')

1284

self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')

1285

self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')

1286

self.assertEqual(pattern.sub('#', '\n'), '#\n#')

1287

1288

pattern = re.compile('$', re.MULTILINE)

1289

self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )

1290

self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')

1291

self.assertEqual(pattern.sub('#', '\n'), '#\n#')

1292

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1293

def test_bytes_str_mixing(self):

1294

# Mixing str and bytes is disallowed

1295

pat = re.compile('.')

1296

bpat = re.compile(b'.')

1297

self.assertRaises(TypeError, pat.match, b'b')

1298

self.assertRaises(TypeError, bpat.match, 'b')

1299

self.assertRaises(TypeError, pat.sub, b'b', 'c')

1300

self.assertRaises(TypeError, pat.sub, 'b', b'c')

1301

self.assertRaises(TypeError, pat.sub, b'b', b'c')

1302

self.assertRaises(TypeError, bpat.sub, b'b', 'c')

1303

self.assertRaises(TypeError, bpat.sub, 'b', b'c')

1304

self.assertRaises(TypeError, bpat.sub, 'b', 'c')

1305

1306

def test_ascii_and_unicode_flag(self):

1307

# String patterns

1308

for flags in (0, re.UNICODE):

1309

pat = re.compile('\xc0', flags | re.IGNORECASE)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1310

self.assertTrue(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1311

pat = re.compile('\w', flags)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1312

self.assertTrue(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1313

pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1314

self.assertIsNone(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1315

pat = re.compile('(?a)\xc0', re.IGNORECASE)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1316

self.assertIsNone(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1317

pat = re.compile('\w', re.ASCII)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1318

self.assertIsNone(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1319

pat = re.compile('(?a)\w')

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1320

self.assertIsNone(pat.match('\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1321

# Bytes patterns

1322

for flags in (0, re.ASCII):

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

1323

pat = re.compile(b'\xc0', flags | re.IGNORECASE)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1324

self.assertIsNone(pat.match(b'\xe0'))

Serhiy Storchaka

2014-09-14 15:56:27 +0300

[diff] [blame]

1325

pat = re.compile(b'\w', flags)

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1326

self.assertIsNone(pat.match(b'\xe0'))

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

1327

# Incompatibilities

1328

self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)

1329

self.assertRaises(ValueError, re.compile, b'(?u)\w')

1330

self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)

1331

self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)

1332

self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)

1333

self.assertRaises(ValueError, re.compile, '(?au)\w')

1334

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

1335

def test_locale_flag(self):

1336

import locale

1337

_, enc = locale.getlocale(locale.LC_CTYPE)

1338

# Search non-ASCII letter

1339

for i in range(128, 256):

1340

try:

1341

c = bytes([i]).decode(enc)

1342

sletter = c.lower()

1343

if sletter == c: continue

1344

bletter = sletter.encode(enc)

1345

if len(bletter) != 1: continue

1346

if bletter.decode(enc) != sletter: continue

1347

bpat = re.escape(bytes([i]))

1348

break

1349

except (UnicodeError, TypeError):

pass

else:

bletter = None

bpat = b'A'

# Bytes patterns

pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)

1356

if bletter:

1357

self.assertTrue(pat.match(bletter))

1358

pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)

1359

if bletter:

1360

self.assertTrue(pat.match(bletter))

1361

pat = re.compile(bpat, re.IGNORECASE)

1362

if bletter:

1363

self.assertIsNone(pat.match(bletter))

1364

pat = re.compile(b'\w', re.LOCALE)

1365

if bletter:

1366

self.assertTrue(pat.match(bletter))

1367

pat = re.compile(b'(?L)\w')

1368

if bletter:

1369

self.assertTrue(pat.match(bletter))

1370

pat = re.compile(b'\w')

1371

if bletter:

1372

self.assertIsNone(pat.match(bletter))

1373

# Incompatibilities

1374

self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)

1375

self.assertWarns(DeprecationWarning, re.compile, '(?L)')

1376

self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)

1377

self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)

1378

self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)

1379

self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')

1380

Ezio Melotti

b92ed7c

2010-03-06 15:24:08 +0000

[diff] [blame]

1381

def test_bug_6509(self):

1382

# Replacement strings of both types must parse properly.

1383

# all strings

1384

pat = re.compile('a(\w)')

1385

self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')

1386

pat = re.compile('a(.)')

1387

self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')

1388

pat = re.compile('..')

1389

self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')

1390

1391

# all bytes

1392

pat = re.compile(b'a(\w)')

1393

self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')

1394

pat = re.compile(b'a(.)')

1395

self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')

1396

pat = re.compile(b'..')

1397

self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')

1398

Antoine Pitrou

82feb1f

2010-01-14 17:34:48 +0000

[diff] [blame]

1399

def test_dealloc(self):

1400

# issue 3299: check for segfault in debug build

1401

import _sre

Ezio Melotti

0f77f46

2010-01-23 10:49:39 +0000

[diff] [blame]

1402

# the overflow limit is different on wide and narrow builds and it

1403

# depends on the definition of SRE_CODE (see sre.h).

1404

# 2**128 should be big enough to overflow on both. For smaller values

1405

# a RuntimeError is raised instead of OverflowError.

1406

long_overflow = 2**128

Antoine Pitrou

82feb1f

2010-01-14 17:34:48 +0000

[diff] [blame]

1407

self.assertRaises(TypeError, re.finditer, "a", {})

Serhiy Storchaka

2014-09-29 22:49:23 +0300

[diff] [blame]

1408

with self.assertRaises(OverflowError):

1409

_sre.compile("abc", 0, [long_overflow], 0, [], [])

1410

with self.assertRaises(TypeError):

1411

_sre.compile({}, 0, [], 0, [], [])

Christian Heimes

2008-01-03 23:01:04 +0000

[diff] [blame]

1412

Martin v. Löwis

d63a3b8

2011-09-28 07:41:54 +0200

[diff] [blame]

1413

def test_search_dot_unicode(self):

Serhiy Storchaka

2014-09-14 16:20:20 +0300

[diff] [blame]

1414

self.assertTrue(re.search("123.*-", '123abc-'))

1415

self.assertTrue(re.search("123.*-", '123\xe9-'))

1416

self.assertTrue(re.search("123.*-", '123\u20ac-'))

1417

self.assertTrue(re.search("123.*-", '123\U0010ffff-'))

1418

self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))

Martin v. Löwis

d63a3b8

2011-09-28 07:41:54 +0200

[diff] [blame]

1419

Ezio Melotti

df723e1

2012-03-13 01:29:48 +0200

[diff] [blame]

1420

def test_compile(self):

1421

# Test return value when given string and pattern as parameter

1422

pattern = re.compile('random pattern')

1423

self.assertIsInstance(pattern, re._pattern_type)

1424

same_pattern = re.compile(pattern)

1425

self.assertIsInstance(same_pattern, re._pattern_type)

1426

self.assertIs(same_pattern, pattern)

1427

# Test behaviour when not given a string or pattern as parameter

1428

self.assertRaises(TypeError, re.compile, 0)

1429

Ezio Melotti

fe8e6e7

2013-01-11 08:32:01 +0200

[diff] [blame]

1430

def test_bug_13899(self):

1431

# Issue #13899: re pattern r"[\A]" should work like "A" but matches

1432

# nothing. Ditto B and Z.

Serhiy Storchaka

2015-03-24 22:58:14 +0200

[diff] [blame]

1433

with self.assertWarns(DeprecationWarning):

1434

self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),

1435

['A', 'B', '\b', 'C', 'Z'])

Ezio Melotti

fe8e6e7

2013-01-11 08:32:01 +0200

[diff] [blame]

1436

Antoine Pitrou

b33941a

2012-12-03 20:55:56 +0100

[diff] [blame]

1437

@bigmemtest(size=_2G, memuse=1)

Antoine Pitrou

1f1888e

2012-12-03 20:53:12 +0100

[diff] [blame]

1438

def test_large_search(self, size):

1439

# Issue #10182: indices were 32-bit-truncated.

1440

s = 'a' * size

1441

m = re.search('$', s)

1442

self.assertIsNotNone(m)

Antoine Pitrou

86067c2

2012-12-03 21:08:43 +0100

[diff] [blame]

1443

self.assertEqual(m.start(), size)

1444

self.assertEqual(m.end(), size)

Antoine Pitrou

1f1888e

2012-12-03 20:53:12 +0100

[diff] [blame]

1445

Antoine Pitrou

2012-12-02 12:52:36 +0100

[diff] [blame]

1446

# The huge memuse is because of re.sub() using a list and a join()

1447

# to create the replacement result.

Antoine Pitrou

b33941a

2012-12-03 20:55:56 +0100

[diff] [blame]

1448

@bigmemtest(size=_2G, memuse=16 + 2)

Antoine Pitrou

1f1888e

2012-12-03 20:53:12 +0100

[diff] [blame]

1449

def test_large_subn(self, size):

Antoine Pitrou

2012-12-02 12:52:36 +0100

[diff] [blame]

1450

# Issue #10182: indices were 32-bit-truncated.

1451

s = 'a' * size

Antoine Pitrou

2012-12-02 12:52:36 +0100

[diff] [blame]

1452

r, n = re.subn('', '', s)

1453

self.assertEqual(r, s)

1454

self.assertEqual(n, size + 1)

1455

Serhiy Storchaka

c1b59d4

2012-12-29 23:38:48 +0200

[diff] [blame]

1456

def test_bug_16688(self):

1457

# Issue 16688: Backreferences make case-insensitive regex fail on

1458

# non-ASCII strings.

1459

self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])

1460

self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))

Antoine Pitrou

2012-12-02 12:52:36 +0100

[diff] [blame]

1461

Serhiy Storchaka

70ca021

2013-02-16 16:47:47 +0200

[diff] [blame]

1462

def test_repeat_minmax_overflow(self):

1463

# Issue #13169

1464

string = "x" * 100000

1465

self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))

1466

self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))

1467

self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))

1468

self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))

1469

self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))

1470

self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))

1471

# 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.

1472

self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)

1473

self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)

1474

self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)

1475

self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))

1476

1477

@cpython_only

1478

def test_repeat_minmax_overflow_maxrepeat(self):

1479

try:

1480

from _sre import MAXREPEAT

1481

except ImportError:

1482

self.skipTest('requires _sre.MAXREPEAT constant')

1483

string = "x" * 100000

1484

self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))

1485

self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),

1486

(0, 100000))

1487

self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))

1488

self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)

1489

self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)

1490

self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)

1491

R David Murray

2013-04-14 13:00:54 -0400

[diff] [blame]

1492

def test_backref_group_name_in_exception(self):

1493

# Issue 17341: Poor error message when compiling invalid regex

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1494

self.checkPatternError('(?P=<foo>)',

1495

"bad character in group name '<foo>'", 4)

R David Murray

2013-04-14 13:00:54 -0400

[diff] [blame]

1496

1497

def test_group_name_in_exception(self):

1498

# Issue 17341: Poor error message when compiling invalid regex

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1499

self.checkPatternError('(?P<?foo>)',

1500

"bad character in group name '?foo'", 4)

R David Murray

2013-04-14 13:00:54 -0400

[diff] [blame]

1501

Serhiy Storchaka

1f35ae0

2013-08-03 19:18:38 +0300

[diff] [blame]

1502

def test_issue17998(self):

1503

for reps in '*', '+', '?', '{1}':

1504

for mod in '', '?':

1505

pattern = '.' + reps + mod + 'yz'

1506

self.assertEqual(re.compile(pattern, re.S).findall('xyz'),

1507

['xyz'], msg=pattern)

1508

pattern = pattern.encode()

1509

self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),

1510

[b'xyz'], msg=pattern)

1511

Serhiy Storchaka

36af10c

2013-10-20 13:13:31 +0300

[diff] [blame]

1512

def test_match_repr(self):

1513

for string in '[abracadabra]', S('[abracadabra]'):

1514

m = re.search(r'(.+)(.*?)\1', string)

1515

self.assertEqual(repr(m), "<%s.%s object; "

1516

"span=(1, 12), match='abracadabra'>" %

1517

(type(m).__module__, type(m).__qualname__))

1518

for string in (b'[abracadabra]', B(b'[abracadabra]'),

1519

bytearray(b'[abracadabra]'),

1520

memoryview(b'[abracadabra]')):

1521

m = re.search(rb'(.+)(.*?)\1', string)

1522

self.assertEqual(repr(m), "<%s.%s object; "

1523

"span=(1, 12), match=b'abracadabra'>" %

1524

(type(m).__module__, type(m).__qualname__))

1525

1526

first, second = list(re.finditer("(aa)|(bb)", "aa bb"))

1527

self.assertEqual(repr(first), "<%s.%s object; "

1528

"span=(0, 2), match='aa'>" %

1529

(type(second).__module__, type(first).__qualname__))

1530

self.assertEqual(repr(second), "<%s.%s object; "

1531

"span=(3, 5), match='bb'>" %

1532

(type(second).__module__, type(second).__qualname__))

1533

Serhiy Storchaka

70ca021

2013-02-16 16:47:47 +0200

[diff] [blame]

1534

Serhiy Storchaka

98985a1

2013-08-19 23:18:23 +0300

[diff] [blame]

1535

def test_bug_2537(self):

1536

# issue 2537: empty submatches

1537

for outer_op in ('{0,}', '*', '+', '{1,187}'):

1538

for inner_op in ('{0,}', '*', '?'):

1539

r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))

1540

m = r.match("xyyzy")

1541

self.assertEqual(m.group(0), "xyy")

1542

self.assertEqual(m.group(1), "")

1543

self.assertEqual(m.group(2), "y")

1544

Antoine Pitrou

2014-02-03 20:59:59 +0100

[diff] [blame]

1545

def test_debug_flag(self):

Serhiy Storchaka

2014-09-21 22:47:55 +0300

[diff] [blame]

1546

pat = r'(\.)(?:[ch]|py)(?(1)$|: )'

Antoine Pitrou

2014-02-03 20:59:59 +0100

[diff] [blame]

1547

with captured_stdout() as out:

Serhiy Storchaka

2014-09-21 22:47:55 +0300

[diff] [blame]

1548

re.compile(pat, re.DEBUG)

1549

dump = '''\

Serhiy Storchaka

c7f7d38

2014-11-09 20:48:36 +0200

[diff] [blame]

SUBPATTERN 1

LITERAL 46

SUBPATTERN None

BRANCH

IN

LITERAL 99

LITERAL 104

OR

LITERAL 112

LITERAL 121

SUBPATTERN None

GROUPREF_EXISTS 1

AT AT_END

ELSE

LITERAL 58

LITERAL 32

Serhiy Storchaka

2014-09-21 22:47:55 +0300

[diff] [blame]

1566

'''

1567

self.assertEqual(out.getvalue(), dump)

Antoine Pitrou

2014-02-03 20:59:59 +0100

[diff] [blame]

1568

# Debug output is output again even a second time (bypassing

1569

# the cache -- issue #20426).

1570

with captured_stdout() as out:

Serhiy Storchaka

2014-09-21 22:47:55 +0300

[diff] [blame]

1571

re.compile(pat, re.DEBUG)

1572

self.assertEqual(out.getvalue(), dump)

Antoine Pitrou

2014-02-03 20:59:59 +0100

[diff] [blame]

1573

Serhiy Storchaka

ccdf352

2014-03-06 11:28:32 +0200

[diff] [blame]

1574

def test_keyword_parameters(self):

1575

# Issue #20283: Accepting the string keyword parameter.

1576

pat = re.compile(r'(ab)')

1577

self.assertEqual(

1578

pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))

1579

self.assertEqual(

Serhiy Storchaka

a537eb4

2014-03-06 11:36:15 +0200

[diff] [blame]

1580

pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))

1581

self.assertEqual(

Serhiy Storchaka

ccdf352

2014-03-06 11:28:32 +0200

[diff] [blame]

1582

pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))

1583

self.assertEqual(

1584

pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])

1585

self.assertEqual(

1586

pat.split(string='abracadabra', maxsplit=1),

1587

['', 'ab', 'racadabra'])

1588

self.assertEqual(

1589

pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),

1590

(7, 9))

1591

Serhiy Storchaka

429b59e

2014-05-14 21:48:17 +0300

[diff] [blame]

1592

def test_bug_20998(self):

1593

# Issue #20998: Fullmatch of repeated single character pattern

1594

# with ignore case.

1595

self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))

1596

Serhiy Storchaka

4659cc0

2014-10-31 00:53:49 +0200

[diff] [blame]

1597

def test_locale_caching(self):

1598

# Issue #22410

1599

oldlocale = locale.setlocale(locale.LC_CTYPE)

1600

self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)

1601

for loc in 'en_US.iso88591', 'en_US.utf8':

1602

try:

1603

locale.setlocale(locale.LC_CTYPE, loc)

1604

except locale.Error:

1605

# Unsupported locale on this system

1606

self.skipTest('test needs %s locale' % loc)

1607

1608

re.purge()

1609

self.check_en_US_iso88591()

1610

self.check_en_US_utf8()

1611

re.purge()

1612

self.check_en_US_utf8()

1613

self.check_en_US_iso88591()

1614

1615

def check_en_US_iso88591(self):

1616

locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')

1617

self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))

1618

self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))

1619

self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))

1620

self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))

1621

self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))

1622

self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))

1623

1624

def check_en_US_utf8(self):

1625

locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')

1626

self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))

1627

self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))

1628

self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))

1629

self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))

1630

self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))

1631

self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))

1632

Serhiy Storchaka

ad446d5

2014-11-10 13:49:00 +0200

[diff] [blame]

1633

def test_error(self):

1634

with self.assertRaises(re.error) as cm:

1635

re.compile('(\u20ac))')

1636

err = cm.exception

1637

self.assertIsInstance(err.pattern, str)

1638

self.assertEqual(err.pattern, '(\u20ac))')

1639

self.assertEqual(err.pos, 3)

1640

self.assertEqual(err.lineno, 1)

1641

self.assertEqual(err.colno, 4)

1642

self.assertIn(err.msg, str(err))

1643

self.assertIn(' at position 3', str(err))

1644

self.assertNotIn(' at position 3', err.msg)

1645

# Bytes pattern

1646

with self.assertRaises(re.error) as cm:

1647

re.compile(b'(\xa4))')

1648

err = cm.exception

1649

self.assertIsInstance(err.pattern, bytes)

1650

self.assertEqual(err.pattern, b'(\xa4))')

1651

self.assertEqual(err.pos, 3)

1652

# Multiline pattern

1653

with self.assertRaises(re.error) as cm:

re.compile("""

(

abc

)

)

(

""", re.VERBOSE)

err = cm.exception

self.assertEqual(err.pos, 77)

1663

self.assertEqual(err.lineno, 5)

1664

self.assertEqual(err.colno, 17)

1665

self.assertIn(err.msg, str(err))

1666

self.assertIn(' at position 77', str(err))

1667

self.assertIn('(line 5, column 17)', str(err))

1668

Serhiy Storchaka

2015-03-25 21:03:47 +0200

[diff] [blame]

1669

def test_misc_errors(self):

1670

self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0)

1671

self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0)

1672

self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5)

1673

self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)

1674

self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)

1675

self.checkPatternError(r'(?iz)', 'unknown flag', 3)

1676

self.checkPatternError(r'(?i', 'missing )', 3)

1677

self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)

1678

self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)

1679

self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)

1680

self.checkPatternError(r'(?', 'unexpected end of pattern', 2)

1681

Antoine Pitrou

2013-10-25 21:36:10 +0200

[diff] [blame]

1682

Serhiy Storchaka

5c24d0e

2013-11-23 22:42:43 +0200

[diff] [blame]

1683

class PatternReprTests(unittest.TestCase):

1684

def check(self, pattern, expected):

1685

self.assertEqual(repr(re.compile(pattern)), expected)

1686

1687

def check_flags(self, pattern, flags, expected):

1688

self.assertEqual(repr(re.compile(pattern, flags)), expected)

1689

1690

def test_without_flags(self):

1691

self.check('random pattern',

1692

"re.compile('random pattern')")

1693

1694

def test_single_flag(self):

1695

self.check_flags('random pattern', re.IGNORECASE,

1696

"re.compile('random pattern', re.IGNORECASE)")

1697

1698

def test_multiple_flags(self):

1699

self.check_flags('random pattern', re.I|re.S|re.X,

1700

"re.compile('random pattern', "

1701

"re.IGNORECASE|re.DOTALL|re.VERBOSE)")

1702

1703

def test_unicode_flag(self):

1704

self.check_flags('random pattern', re.U,

1705

"re.compile('random pattern')")

1706

self.check_flags('random pattern', re.I|re.S|re.U,

1707

"re.compile('random pattern', "

1708

"re.IGNORECASE|re.DOTALL)")

1709

1710

def test_inline_flags(self):

1711

self.check('(?i)pattern',

1712

"re.compile('(?i)pattern', re.IGNORECASE)")

1713

1714

def test_unknown_flags(self):

1715

self.check_flags('random pattern', 0x123000,

1716

"re.compile('random pattern', 0x123000)")

1717

self.check_flags('random pattern', 0x123000|re.I,

1718

"re.compile('random pattern', re.IGNORECASE|0x123000)")

1719

1720

def test_bytes(self):

1721

self.check(b'bytes pattern',

1722

"re.compile(b'bytes pattern')")

1723

self.check_flags(b'bytes pattern', re.A,

1724

"re.compile(b'bytes pattern', re.ASCII)")

1725

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

1726

def test_locale(self):

1727

self.check_flags(b'bytes pattern', re.L,

1728

"re.compile(b'bytes pattern', re.LOCALE)")

1729

Serhiy Storchaka

5c24d0e

2013-11-23 22:42:43 +0200

[diff] [blame]

1730

def test_quotes(self):

1731

self.check('random "double quoted" pattern',

1732

'''re.compile('random "double quoted" pattern')''')

1733

self.check("random 'single quoted' pattern",

1734

'''re.compile("random 'single quoted' pattern")''')

1735

self.check('''both 'single' and "double" quotes''',

1736

'''re.compile('both \\'single\\' and "double" quotes')''')

1737

1738

def test_long_pattern(self):

1739

pattern = 'Very %spattern' % ('long ' * 1000)

1740

r = repr(re.compile(pattern))

1741

self.assertLess(len(r), 300)

1742

self.assertEqual(r[:30], "re.compile('Very long long lon")

1743

r = repr(re.compile(pattern, re.I))

1744

self.assertLess(len(r), 300)

1745

self.assertEqual(r[:30], "re.compile('Very long long lon")

1746

self.assertEqual(r[-16:], ", re.IGNORECASE)")

1747

1748

Antoine Pitrou

2013-10-25 21:36:10 +0200

[diff] [blame]

1749

class ImplementationTest(unittest.TestCase):

1750

"""

1751

Test implementation details of the re module.

1752

"""

1753

1754

def test_overlap_table(self):

1755

f = sre_compile._generate_overlap_table

1756

self.assertEqual(f(""), [])

1757

self.assertEqual(f("a"), [0])

1758

self.assertEqual(f("abcd"), [0, 0, 0, 0])

1759

self.assertEqual(f("aaaa"), [0, 1, 2, 3])

1760

self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])

1761

self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])

1762

1763

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1764

class ExternalTests(unittest.TestCase):

Guido van Rossum

8e0ce30

1997-07-11 19:34:44 +0000

[diff] [blame]

1765

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1766

def test_re_benchmarks(self):

1767

're_tests benchmarks'

1768

from test.re_tests import benchmarks

1769

for pattern, s in benchmarks:

1770

with self.subTest(pattern=pattern, string=s):

1771

p = re.compile(pattern)

1772

self.assertTrue(p.search(s))

1773

self.assertTrue(p.match(s))

1774

self.assertTrue(p.fullmatch(s))

1775

s2 = ' '*10000 + s + ' '*10000

1776

self.assertTrue(p.search(s2))

1777

self.assertTrue(p.match(s2, 10000))

1778

self.assertTrue(p.match(s2, 10000, 10000 + len(s)))

1779

self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s)))

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1780

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1781

def test_re_tests(self):

1782

're_tests test suite'

1783

from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR

1784

for t in tests:

1785

pattern = s = outcome = repl = expected = None

1786

if len(t) == 5:

1787

pattern, s, outcome, repl, expected = t

1788

elif len(t) == 3:

1789

pattern, s, outcome = t

Guido van Rossum

41360a4

1998-03-26 19:42:58 +0000

[diff] [blame]

1790

else:

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1791

raise ValueError('Test tuples should have 3 or 5 fields', t)

1792

1793

with self.subTest(pattern=pattern, string=s):

1794

if outcome == SYNTAX_ERROR: # Expected a syntax error

1795

with self.assertRaises(re.error):

re.compile(pattern)

continue

obj = re.compile(pattern)

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1800

result = obj.search(s)

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1801

if outcome == FAIL:

1802

self.assertIsNone(result, 'Succeeded incorrectly')

continue

with self.subTest():

self.assertTrue(result, 'Failed incorrectly')

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1807

# Matched, as expected, so now we compute the

1808

# result string and compare it to our expected result.

1809

start, end = result.span(0)

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1810

vardict = {'found': result.group(0),

1811

'groups': result.group(),

1812

'flags': result.re.flags}

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1813

for i in range(1, 100):

1814

try:

1815

gi = result.group(i)

1816

# Special hack because else the string concat fails:

if gi is None:

gi = "None"

except IndexError:

gi = "Error"

vardict['g%d' % i] = gi

1822

for i in result.re.groupindex.keys():

try:

gi = result.group(i)

if gi is None:

gi = "None"

except IndexError:

gi = "Error"

vardict[i] = gi

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1830

self.assertEqual(eval(repl, vardict), expected,

1831

'grouping error')

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1832

Antoine Pitrou

22628c4

2008-07-22 17:53:22 +0000

[diff] [blame]

1833

# Try the match with both pattern and string converted to

1834

# bytes, and check that it still succeeds.

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1835

try:

Antoine Pitrou

22628c4

2008-07-22 17:53:22 +0000

[diff] [blame]

1836

bpat = bytes(pattern, "ascii")

1837

bs = bytes(s, "ascii")

1838

except UnicodeEncodeError:

1839

# skip non-ascii tests

1840

pass

1841

else:

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1842

with self.subTest('bytes pattern match'):

Serhiy Storchaka

2014-12-01 11:50:07 +0200

[diff] [blame]

1843

obj = re.compile(bpat)

1844

self.assertTrue(obj.search(bs))

1845

1846

# Try the match with LOCALE enabled, and check that it

1847

# still succeeds.

1848

with self.subTest('locale-sensitive match'):

1849

obj = re.compile(bpat, re.LOCALE)

1850

result = obj.search(bs)

1851

if result is None:

1852

print('=== Fails on locale-sensitive match', t)

Fredrik Lundh

8e6d571

2000-08-08 17:06:53 +0000

[diff] [blame]

1853

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1854

# Try the match with the search area limited to the extent

1855

# of the match and see if it still succeeds. \B will

1856

# break (because it won't match at the end or start of a

1857

# string), so we'll ignore patterns that feature it.

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1858

if (pattern[:2] != r'\B' and pattern[-2:] != r'\B'

1859

and result is not None):

1860

with self.subTest('range-limited match'):

1861

obj = re.compile(pattern)

1862

self.assertTrue(obj.search(s, start, end + 1))

Fredrik Lundh

1151a8c

2000-08-08 16:47:42 +0000

[diff] [blame]

1863

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1864

# Try the match with IGNORECASE enabled, and check that it

1865

# still succeeds.

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1866

with self.subTest('case-insensitive match'):

1867

obj = re.compile(pattern, re.IGNORECASE)

1868

self.assertTrue(obj.search(s))

Guido van Rossum

dfa6790

1997-12-08 17:12:06 +0000

[diff] [blame]

1869

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1870

# Try the match with UNICODE locale enabled, and check

1871

# that it still succeeds.

Serhiy Storchaka

2014-12-01 11:06:45 +0200

[diff] [blame]

1872

with self.subTest('unicode-sensitive match'):

1873

obj = re.compile(pattern, re.UNICODE)

1874

self.assertTrue(obj.search(s))

Fredrik Lundh

8e6d571

2000-08-08 17:06:53 +0000

[diff] [blame]

1875

Gregory P. Smith

5a63183

2010-07-27 05:31:29 +0000

[diff] [blame]

1876

Skip Montanaro

2003-04-24 19:43:18 +0000

[diff] [blame]

1877

if __name__ == "__main__":

Serhiy Storchaka