Blame - Lib/test/test_difflib.py - platform/external/python/cpython3

2003-07-01 14:59:46 +0000

[diff] [blame]

3

import unittest

Raymond Hettinger

43d790c

2003-07-16 04:34:56 +0000

[diff] [blame]

4

import doctest

Gustavo Niemeyer

54814881

2006-01-31 18:34:13 +0000

[diff] [blame]

5

import sys

Neal Norwitz

2003-07-01 14:59:46 +0000

[diff] [blame]

6

Neal Norwitz

2003-07-01 14:59:46 +0000

[diff] [blame]

7

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

8

class TestWithAscii(unittest.TestCase):

9

def test_one_insert(self):

10

sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)

11

self.assertAlmostEqual(sm.ratio(), 0.995, places=3)

12

self.assertEqual(list(sm.get_opcodes()),

13

[ ('insert', 0, 0, 0, 1),

14

('equal', 0, 100, 1, 101)])

Terry Reedy

2010-12-15 20:18:10 +0000

[diff] [blame]

15

self.assertEqual(sm.bpopular, set())

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

16

sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)

17

self.assertAlmostEqual(sm.ratio(), 0.995, places=3)

18

self.assertEqual(list(sm.get_opcodes()),

19

[ ('equal', 0, 50, 0, 50),

20

('insert', 50, 50, 50, 51),

21

('equal', 50, 100, 51, 101)])

Terry Reedy

2010-12-15 20:18:10 +0000

[diff] [blame]

22

self.assertEqual(sm.bpopular, set())

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

23

24

def test_one_delete(self):

25

sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)

26

self.assertAlmostEqual(sm.ratio(), 0.994, places=3)

27

self.assertEqual(list(sm.get_opcodes()),

28

[ ('equal', 0, 40, 0, 40),

29

('delete', 40, 41, 40, 40),

30

('equal', 41, 81, 40, 80)])

31

Terry Reedy

2010-12-15 20:18:10 +0000

[diff] [blame]

32

def test_bjunk(self):

33

sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',

34

a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)

35

self.assertEqual(sm.bjunk, set())

36

37

sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',

38

a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)

39

self.assertEqual(sm.bjunk, {' '})

40

41

sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],

42

a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)

43

self.assertEqual(sm.bjunk, {' ', 'b'})

44

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

45

46

class TestAutojunk(unittest.TestCase):

47

"""Tests for the autojunk parameter added in 2.7"""

48

def test_one_insert_homogenous_sequence(self):

49

# By default autojunk=True and the heuristic kicks in for a sequence

50

# of length 200+

51

seq1 = 'b' * 200

52

seq2 = 'a' + 'b' * 200

53

54

sm = difflib.SequenceMatcher(None, seq1, seq2)

55

self.assertAlmostEqual(sm.ratio(), 0, places=3)

Terry Reedy

2010-12-15 20:18:10 +0000

[diff] [blame]

56

self.assertEqual(sm.bpopular, {'b'})

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

57

58

# Now turn the heuristic off

59

sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)

60

self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)

Terry Reedy

2010-12-15 20:18:10 +0000

[diff] [blame]

61

self.assertEqual(sm.bpopular, set())

Terry Reedy

2010-11-25 06:12:34 +0000

[diff] [blame]

62

63

64

class TestSFbugs(unittest.TestCase):

Neal Norwitz

2003-07-01 14:59:46 +0000

[diff] [blame]

65

def test_ratio_for_null_seqn(self):

66

# Check clearing of SF bug 763023

67

s = difflib.SequenceMatcher(None, [], [])

68

self.assertEqual(s.ratio(), 1)

69

self.assertEqual(s.quick_ratio(), 1)

70

self.assertEqual(s.real_quick_ratio(), 1)

71

Brett Cannon

d2c5b4b

2004-07-10 23:54:07 +0000

[diff] [blame]

72

def test_comparing_empty_lists(self):

73

# Check fix for bug #979794

74

group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()

Georg Brandl

a18af4e

2007-04-21 15:47:16 +0000

[diff] [blame]

75

self.assertRaises(StopIteration, next, group_gen)

Brett Cannon

d2c5b4b

2004-07-10 23:54:07 +0000

[diff] [blame]

76

diff_gen = difflib.unified_diff([], [])

Georg Brandl

a18af4e

2007-04-21 15:47:16 +0000

[diff] [blame]

77

self.assertRaises(StopIteration, next, diff_gen)

Brett Cannon

d2c5b4b

2004-07-10 23:54:07 +0000

[diff] [blame]

78

Raymond Hettinger

fabefc3

2014-06-21 11:57:36 -0700

[diff] [blame]

79

def test_matching_blocks_cache(self):

80

# Issue #21635

81

s = difflib.SequenceMatcher(None, "abxcd", "abcd")

82

first = s.get_matching_blocks()

83

second = s.get_matching_blocks()

84

self.assertEqual(second[0].size, 2)

85

self.assertEqual(second[1].size, 2)

86

self.assertEqual(second[2].size, 0)

87

Senthil Kumaran

758025c

2009-11-23 19:02:52 +0000

[diff] [blame]

88

def test_added_tab_hint(self):

89

# Check fix for bug #1488943

90

diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))

91

self.assertEqual("- \tI am a buggy", diff[0])

Miss Islington (bot)

7e293f5

2019-08-21 12:18:26 -0700

[diff] [blame]

92

self.assertEqual("? \t --\n", diff[1])

Senthil Kumaran

758025c

2009-11-23 19:02:52 +0000

[diff] [blame]

93

self.assertEqual("+ \t\tI am a bug", diff[2])

94

self.assertEqual("? +\n", diff[3])

95

Miss Islington (bot)

7e293f5

2019-08-21 12:18:26 -0700

[diff] [blame]

96

def test_hint_indented_properly_with_tabs(self):

97

diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))

98

self.assertEqual("- \t \t \t^", diff[0])

99

self.assertEqual("+ \t \t \t^\n", diff[1])

100

self.assertEqual("? \t \t \t +\n", diff[2])

101

Raymond Hettinger

01b731f

2018-04-05 11:19:57 -0700

[diff] [blame]

102

def test_mdiff_catch_stop_iteration(self):

103

# Issue #33224

104

self.assertEqual(

105

list(difflib._mdiff(["2"], ["3"], 1)),

106

[((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],

)

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

110

patch914575_from1 = """

111

1. Beautiful is beTTer than ugly.

112

2. Explicit is better than implicit.

113

3. Simple is better than complex.

114

4. Complex is better than complicated.

115

"""

116

117

patch914575_to1 = """

118

1. Beautiful is better than ugly.

119

3. Simple is better than complex.

120

4. Complicated is better than complex.

121

5. Flat is better than nested.

122

"""

123

Berker Peksag

102029d

2015-03-15 01:18:47 +0200

[diff] [blame]

124

patch914575_nonascii_from1 = """

125

1. Beautiful is beTTer than ugly.

126

2. Explicit is better than ımplıcıt.

127

3. Simple is better than complex.

128

4. Complex is better than complicated.

129

"""

130

131

patch914575_nonascii_to1 = """

132

1. Beautiful is better than ügly.

133

3. Sımple is better than complex.

134

4. Complicated is better than cömplex.

135

5. Flat is better than nested.

136

"""

137

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

138

patch914575_from2 = """

Raymond Hettinger

15f44ab

2016-08-30 10:47:49 -0700

[diff] [blame]

139

\t\tLine 1: preceded by from:[tt] to:[ssss]

140

\t\tLine 2: preceded by from:[sstt] to:[sssst]

141

\t \tLine 3: preceded by from:[sstst] to:[ssssss]

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

142

Line 4: \thas from:[sst] to:[sss] after :

143

Line 5: has from:[t] to:[ss] at end\t

144

"""

145

146

patch914575_to2 = """

Raymond Hettinger

15f44ab

2016-08-30 10:47:49 -0700

[diff] [blame]

147

Line 1: preceded by from:[tt] to:[ssss]

148

\tLine 2: preceded by from:[sstt] to:[sssst]

149

Line 3: preceded by from:[sstst] to:[ssssss]

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

150

Line 4: has from:[sst] to:[sss] after :

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

151

Line 5: has from:[t] to:[ss] at end

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

152

"""

153

154

patch914575_from3 = """line 0

155

1234567890123456789012345689012345

156

line 1

157

line 2

158

line 3

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

159

line 4 changed

160

line 5 changed

161

line 6 changed

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

line 7

line 8 subtracted

line 9

1234567890123456789012345689012345

166

short line

167

just fits in!!

168

just fits in two lines yup!!

169

the end"""

170

171

patch914575_to3 = """line 0

172

1234567890123456789012345689012345

173

line 1

174

line 2 added

175

line 3

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

176

line 4 chanGEd

177

line 5a chanGed

178

line 6a changEd

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

line 7

line 8

line 9

1234567890

another long line that needs to be wrapped

184

just fitS in!!

185

just fits in two lineS yup!!

186

the end"""

187

188

class TestSFpatches(unittest.TestCase):

189

190

def test_html_diff(self):

191

# Check SF patch 914575 for generating HTML differences

192

f1a = ((patch914575_from1 + '123\n'*10)*3)

193

t1a = (patch914575_to1 + '123\n'*10)*3

194

f1b = '456\n'*10 + f1a

195

t1b = '456\n'*10 + t1a

196

f1a = f1a.splitlines()

197

t1a = t1a.splitlines()

198

f1b = f1b.splitlines()

199

t1b = t1b.splitlines()

200

f2 = patch914575_from2.splitlines()

201

t2 = patch914575_to2.splitlines()

202

f3 = patch914575_from3

203

t3 = patch914575_to3

204

i = difflib.HtmlDiff()

205

j = difflib.HtmlDiff(tabsize=2)

206

k = difflib.HtmlDiff(wrapcolumn=14)

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

207

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

208

full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)

209

tables = '\n'.join(

210

[

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

211

'<h2>Context (first diff within numlines=5(default))</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

212

i.make_table(f1a,t1a,'from','to',context=True),

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

213

'<h2>Context (first diff after numlines=5(default))</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

214

i.make_table(f1b,t1b,'from','to',context=True),

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

215

'<h2>Context (numlines=6)</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

216

i.make_table(f1a,t1a,'from','to',context=True,numlines=6),

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

217

'<h2>Context (numlines=0)</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

218

i.make_table(f1a,t1a,'from','to',context=True,numlines=0),

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

219

'<h2>Same Context</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

220

i.make_table(f1a,f1a,'from','to',context=True),

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

221

'<h2>Same Full</h2>',

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

222

i.make_table(f1a,f1a,'from','to',context=False),

223

'<h2>Empty Context</h2>',

224

i.make_table([],[],'from','to',context=True),

225

'<h2>Empty Full</h2>',

226

i.make_table([],[],'from','to',context=False),

227

'<h2>tabsize=2</h2>',

228

j.make_table(f2,t2),

229

'<h2>tabsize=default</h2>',

230

i.make_table(f2,t2),

231

'<h2>Context (wrapcolumn=14,numlines=0)</h2>',

232

k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),

233

'<h2>wrapcolumn=14,splitlines()</h2>',

234

k.make_table(f3.splitlines(),t3.splitlines()),

235

'<h2>wrapcolumn=14,splitlines(True)</h2>',

236

k.make_table(f3.splitlines(True),t3.splitlines(True)),

237

])

238

actual = full.replace('</body>','\n%s\n</body>' % tables)

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

239

Philip Jenvey

a27c5bd

2009-05-28 06:09:08 +0000

[diff] [blame]

240

# temporarily uncomment next two lines to baseline this test

241

#with open('test_difflib_expect.html','w') as fp:

242

# fp.write(actual)

Tim Peters

2004-08-29 22:38:38 +0000

[diff] [blame]

243

Philip Jenvey

a27c5bd

2009-05-28 06:09:08 +0000

[diff] [blame]

244

with open(findfile('test_difflib_expect.html')) as fp:

245

self.assertEqual(actual, fp.read())

Martin v. Löwis

2004-08-29 16:34:40 +0000

[diff] [blame]

246

Gustavo Niemeyer

54814881

2006-01-31 18:34:13 +0000

[diff] [blame]

247

def test_recursion_limit(self):

248

# Check if the problem described in patch #1413711 exists.

249

limit = sys.getrecursionlimit()

250

old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]

251

new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]

252

difflib.SequenceMatcher(None, old, new).get_opcodes()

253

Berker Peksag

102029d

2015-03-15 01:18:47 +0200

[diff] [blame]

254

def test_make_file_default_charset(self):

255

html_diff = difflib.HtmlDiff()

256

output = html_diff.make_file(patch914575_from1.splitlines(),

257

patch914575_to1.splitlines())

258

self.assertIn('content="text/html; charset=utf-8"', output)

259

260

def test_make_file_iso88591_charset(self):

261

html_diff = difflib.HtmlDiff()

262

output = html_diff.make_file(patch914575_from1.splitlines(),

263

patch914575_to1.splitlines(),

264

charset='iso-8859-1')

265

self.assertIn('content="text/html; charset=iso-8859-1"', output)

266

267

def test_make_file_usascii_charset_with_nonascii_input(self):

268

html_diff = difflib.HtmlDiff()

269

output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),

270

patch914575_nonascii_to1.splitlines(),

271

charset='us-ascii')

272

self.assertIn('content="text/html; charset=us-ascii"', output)

273

self.assertIn('ımplıcıt', output)

274

Gustavo Niemeyer

54814881

2006-01-31 18:34:13 +0000

[diff] [blame]

275

R. David Murray

b2416e5

2010-04-12 16:58:02 +0000

[diff] [blame]

276

class TestOutputFormat(unittest.TestCase):

277

def test_tab_delimiter(self):

278

args = ['one', 'two', 'Original', 'Current',

279

'2005-01-26 23:30:50', '2010-04-02 10:20:52']

280

ud = difflib.unified_diff(*args, lineterm='')

281

self.assertEqual(list(ud)[0:2], [

282

"--- Original\t2005-01-26 23:30:50",

283

"+++ Current\t2010-04-02 10:20:52"])

284

cd = difflib.context_diff(*args, lineterm='')

285

self.assertEqual(list(cd)[0:2], [

286

"*** Original\t2005-01-26 23:30:50",

287

"--- Current\t2010-04-02 10:20:52"])

288

289

def test_no_trailing_tab_on_empty_filedate(self):

290

args = ['one', 'two', 'Original', 'Current']

291

ud = difflib.unified_diff(*args, lineterm='')

292

self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])

293

294

cd = difflib.context_diff(*args, lineterm='')

295

self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])

296

Raymond Hettinger

9180deb

2011-04-12 15:25:30 -0700

[diff] [blame]

297

def test_range_format_unified(self):

Raymond Hettinger

49353d0

2011-04-11 12:40:58 -0700

[diff] [blame]

298

# Per the diff spec at http://www.unix.org/single_unix_specification/

299

spec = '''\

300

Each <range> field shall be of the form:

301

%1d", <beginning line number> if the range contains exactly one line,

302

and:

303

"%1d,%1d", <beginning line number>, <number of lines> otherwise.

304

If a range is empty, its beginning line number shall be the number of

305

the line just before the range, or 0 if the empty range starts the file.

306

'''

Raymond Hettinger

9180deb

2011-04-12 15:25:30 -0700

[diff] [blame]

307

fmt = difflib._format_range_unified

Raymond Hettinger

49353d0

2011-04-11 12:40:58 -0700

[diff] [blame]

308

self.assertEqual(fmt(3,3), '3,0')

309

self.assertEqual(fmt(3,4), '4')

310

self.assertEqual(fmt(3,5), '4,2')

311

self.assertEqual(fmt(3,6), '4,3')

312

self.assertEqual(fmt(0,0), '0,0')

R. David Murray

b2416e5

2010-04-12 16:58:02 +0000

[diff] [blame]

313

Raymond Hettinger

9180deb

2011-04-12 15:25:30 -0700

[diff] [blame]

314

def test_range_format_context(self):

315

# Per the diff spec at http://www.unix.org/single_unix_specification/

316

spec = '''\

317

The range of lines in file1 shall be written in the following format

318

if the range contains two or more lines:

319

"*** %d,%d ****\n", <beginning line number>, <ending line number>

320

and the following format otherwise:

321

"*** %d ****\n", <ending line number>

322

The ending line number of an empty range shall be the number of the preceding line,

323

or 0 if the range is at the start of the file.

324

325

Next, the range of lines in file2 shall be written in the following format

326

if the range contains two or more lines:

327

"--- %d,%d ----\n", <beginning line number>, <ending line number>

328

and the following format otherwise:

329

"--- %d ----\n", <ending line number>

330

'''

331

fmt = difflib._format_range_context

332

self.assertEqual(fmt(3,3), '3')

333

self.assertEqual(fmt(3,4), '4')

334

self.assertEqual(fmt(3,5), '4,5')

335

self.assertEqual(fmt(3,6), '4,6')

336

self.assertEqual(fmt(0,0), '0')

337

338

Greg Ward

4d9d256

2015-04-20 20:21:21 -0400

[diff] [blame]

339

class TestBytes(unittest.TestCase):

340

# don't really care about the content of the output, just the fact

341

# that it's bytes and we don't crash

342

def check(self, diff):

343

diff = list(diff) # trigger exceptions first

344

for line in diff:

345

self.assertIsInstance(

346

line, bytes,

347

"all lines of diff should be bytes, but got: %r" % line)

348

349

def test_byte_content(self):

350

# if we receive byte strings, we return byte strings

351

a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes

352

b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes

353

354

unified = difflib.unified_diff

355

context = difflib.context_diff

356

357

check = self.check

358

check(difflib.diff_bytes(unified, a, a))

359

check(difflib.diff_bytes(unified, a, b))

360

361

# now with filenames (content and filenames are all bytes!)

362

check(difflib.diff_bytes(unified, a, a, b'a', b'a'))

363

check(difflib.diff_bytes(unified, a, b, b'a', b'b'))

364

365

# and with filenames and dates

366

check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))

367

check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))

368

369

# same all over again, with context diff

370

check(difflib.diff_bytes(context, a, a))

371

check(difflib.diff_bytes(context, a, b))

372

check(difflib.diff_bytes(context, a, a, b'a', b'a'))

373

check(difflib.diff_bytes(context, a, b, b'a', b'b'))

374

check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))

375

check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))

376

377

def test_byte_filenames(self):

378

# somebody renamed a file from ISO-8859-2 to UTF-8

379

fna = b'\xb3odz.txt' # "łodz.txt"

380

fnb = b'\xc5\x82odz.txt'

381

382

# they transcoded the content at the same time

383

a = [b'\xa3odz is a city in Poland.']

384

b = [b'\xc5\x81odz is a city in Poland.']

385

386

check = self.check

387

unified = difflib.unified_diff

388

context = difflib.context_diff

389

check(difflib.diff_bytes(unified, a, b, fna, fnb))

390

check(difflib.diff_bytes(context, a, b, fna, fnb))

391

392

def assertDiff(expect, actual):

393

# do not compare expect and equal as lists, because unittest

394

# uses difflib to report difference between lists

395

actual = list(actual)

396

self.assertEqual(len(expect), len(actual))

397

for e, a in zip(expect, actual):

398

self.assertEqual(e, a)

expect = [

b'--- \xb3odz.txt',

b'+++ \xc5\x82odz.txt',

403

b'@@ -1 +1 @@',

404

b'-\xa3odz is a city in Poland.',

405

b'+\xc5\x81odz is a city in Poland.',

406

]

407

actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')

408

assertDiff(expect, actual)

409

410

# with dates (plain ASCII)

411

datea = b'2005-03-18'

412

dateb = b'2005-03-19'

413

check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))

414

check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))

415

416

expect = [

417

# note the mixed encodings here: this is deeply wrong by every

418

# tenet of Unicode, but it doesn't crash, it's parseable by

419

# patch, and it's how UNIX(tm) diff behaves

420

b'--- \xb3odz.txt\t2005-03-18',

421

b'+++ \xc5\x82odz.txt\t2005-03-19',

422

b'@@ -1 +1 @@',

423

b'-\xa3odz is a city in Poland.',

424

b'+\xc5\x81odz is a city in Poland.',

425

]

426

actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,

427

lineterm=b'')

428

assertDiff(expect, actual)

429

430

def test_mixed_types_content(self):

431

# type of input content must be consistent: all str or all bytes

a = [b'hello']

b = ['hello']

unified = difflib.unified_diff

436

context = difflib.context_diff

437

438

expect = "lines to compare must be str, not bytes (b'hello')"

439

self._assert_type_error(expect, unified, a, b)

440

self._assert_type_error(expect, unified, b, a)

441

self._assert_type_error(expect, context, a, b)

442

self._assert_type_error(expect, context, b, a)

443

444

expect = "all arguments must be bytes, not str ('hello')"

445

self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)

446

self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)

447

self._assert_type_error(expect, difflib.diff_bytes, context, a, b)

448

self._assert_type_error(expect, difflib.diff_bytes, context, b, a)

449

450

def test_mixed_types_filenames(self):

451

# cannot pass filenames as bytes if content is str (this may not be

452

# the right behaviour, but at least the test demonstrates how

# things work)

a = ['hello\n']

b = ['ohell\n']

fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1

457

fnb = b'ol\xc3a9.txt' # to UTF-8

458

self._assert_type_error(

459

"all arguments must be str, not: b'ol\\xe9.txt'",

460

difflib.unified_diff, a, b, fna, fnb)

461

462

def test_mixed_types_dates(self):

463

# type of dates must be consistent with type of contents

a = [b'foo\n']

b = [b'bar\n']

datea = '1 fév'

dateb = '3 fév'

self._assert_type_error(

469

"all arguments must be bytes, not str ('1 fév')",

470

difflib.diff_bytes, difflib.unified_diff,

471

a, b, b'a', b'b', datea, dateb)

472

473

# if input is str, non-ASCII dates are fine

474

a = ['foo\n']

475

b = ['bar\n']

476

list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))

477

478

def _assert_type_error(self, msg, generator, *args):

479

with self.assertRaises(TypeError) as ctx:

480

list(generator(*args))

481

self.assertEqual(msg, str(ctx.exception))

482

Jamie Davis

0e6c8ee

2018-03-04 00:33:32 -0500

[diff] [blame]

483

class TestJunkAPIs(unittest.TestCase):

484

def test_is_line_junk_true(self):

485

for line in ['#', ' ', ' #', '# ', ' # ', '']:

486

self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))

487

488

def test_is_line_junk_false(self):

489

for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:

490

self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))

491

492

def test_is_line_junk_REDOS(self):

493

evil_input = ('\t' * 1000000) + '##'

494

self.assertFalse(difflib.IS_LINE_JUNK(evil_input))

495

496

def test_is_character_junk_true(self):

497

for char in [' ', '\t']:

498

self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))

499

500

def test_is_character_junk_false(self):

501

for char in ['a', '#', '\n', '\f', '\r', '\v']:

502

self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))

Greg Ward

4d9d256

2015-04-20 20:21:21 -0400

[diff] [blame]

503

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

504

def test_main():

505

difflib.HtmlDiff._default_prefix = 0

506

Doctests = doctest.DocTestSuite(difflib)

Terry Reedy