blob: 5e2ca1a23b928ba41e467896da341c83993a9906 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001import difflib
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test.support import run_unittest, findfile
Neal Norwitze7dfe212003-07-01 14:59:46 +00003import unittest
Raymond Hettinger43d790c2003-07-16 04:34:56 +00004import doctest
Gustavo Niemeyer548148812006-01-31 18:34:13 +00005import sys
Neal Norwitze7dfe212003-07-01 14:59:46 +00006
Neal Norwitze7dfe212003-07-01 14:59:46 +00007
Terry Reedy99f96372010-11-25 06:12:34 +00008class TestWithAscii(unittest.TestCase):
9 def test_one_insert(self):
10 sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12 self.assertEqual(list(sm.get_opcodes()),
13 [ ('insert', 0, 0, 0, 1),
14 ('equal', 0, 100, 1, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000015 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000016 sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18 self.assertEqual(list(sm.get_opcodes()),
19 [ ('equal', 0, 50, 0, 50),
20 ('insert', 50, 50, 50, 51),
21 ('equal', 50, 100, 51, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000022 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000023
24 def test_one_delete(self):
25 sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26 self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27 self.assertEqual(list(sm.get_opcodes()),
28 [ ('equal', 0, 40, 0, 40),
29 ('delete', 40, 41, 40, 40),
30 ('equal', 41, 81, 40, 80)])
31
Terry Reedy17a59252010-12-15 20:18:10 +000032 def test_bjunk(self):
33 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35 self.assertEqual(sm.bjunk, set())
36
37 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39 self.assertEqual(sm.bjunk, {' '})
40
41 sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43 self.assertEqual(sm.bjunk, {' ', 'b'})
44
Terry Reedy99f96372010-11-25 06:12:34 +000045
46class TestAutojunk(unittest.TestCase):
47 """Tests for the autojunk parameter added in 2.7"""
48 def test_one_insert_homogenous_sequence(self):
49 # By default autojunk=True and the heuristic kicks in for a sequence
50 # of length 200+
51 seq1 = 'b' * 200
52 seq2 = 'a' + 'b' * 200
53
54 sm = difflib.SequenceMatcher(None, seq1, seq2)
55 self.assertAlmostEqual(sm.ratio(), 0, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000056 self.assertEqual(sm.bpopular, {'b'})
Terry Reedy99f96372010-11-25 06:12:34 +000057
58 # Now turn the heuristic off
59 sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60 self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000061 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000062
63
64class TestSFbugs(unittest.TestCase):
Neal Norwitze7dfe212003-07-01 14:59:46 +000065 def test_ratio_for_null_seqn(self):
66 # Check clearing of SF bug 763023
67 s = difflib.SequenceMatcher(None, [], [])
68 self.assertEqual(s.ratio(), 1)
69 self.assertEqual(s.quick_ratio(), 1)
70 self.assertEqual(s.real_quick_ratio(), 1)
71
Brett Cannond2c5b4b2004-07-10 23:54:07 +000072 def test_comparing_empty_lists(self):
73 # Check fix for bug #979794
74 group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
Georg Brandla18af4e2007-04-21 15:47:16 +000075 self.assertRaises(StopIteration, next, group_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000076 diff_gen = difflib.unified_diff([], [])
Georg Brandla18af4e2007-04-21 15:47:16 +000077 self.assertRaises(StopIteration, next, diff_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000078
Raymond Hettingerfabefc32014-06-21 11:57:36 -070079 def test_matching_blocks_cache(self):
80 # Issue #21635
81 s = difflib.SequenceMatcher(None, "abxcd", "abcd")
82 first = s.get_matching_blocks()
83 second = s.get_matching_blocks()
84 self.assertEqual(second[0].size, 2)
85 self.assertEqual(second[1].size, 2)
86 self.assertEqual(second[2].size, 0)
87
Senthil Kumaran758025c2009-11-23 19:02:52 +000088 def test_added_tab_hint(self):
89 # Check fix for bug #1488943
90 diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
91 self.assertEqual("- \tI am a buggy", diff[0])
Miss Islington (bot)7e293f52019-08-21 12:18:26 -070092 self.assertEqual("? \t --\n", diff[1])
Senthil Kumaran758025c2009-11-23 19:02:52 +000093 self.assertEqual("+ \t\tI am a bug", diff[2])
94 self.assertEqual("? +\n", diff[3])
95
Miss Islington (bot)7e293f52019-08-21 12:18:26 -070096 def test_hint_indented_properly_with_tabs(self):
97 diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))
98 self.assertEqual("- \t \t \t^", diff[0])
99 self.assertEqual("+ \t \t \t^\n", diff[1])
100 self.assertEqual("? \t \t \t +\n", diff[2])
101
Raymond Hettinger01b731f2018-04-05 11:19:57 -0700102 def test_mdiff_catch_stop_iteration(self):
103 # Issue #33224
104 self.assertEqual(
105 list(difflib._mdiff(["2"], ["3"], 1)),
106 [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],
107 )
108
109
Martin v. Löwise064b412004-08-29 16:34:40 +0000110patch914575_from1 = """
111 1. Beautiful is beTTer than ugly.
112 2. Explicit is better than implicit.
113 3. Simple is better than complex.
114 4. Complex is better than complicated.
115"""
116
117patch914575_to1 = """
118 1. Beautiful is better than ugly.
119 3. Simple is better than complex.
120 4. Complicated is better than complex.
121 5. Flat is better than nested.
122"""
123
Berker Peksag102029d2015-03-15 01:18:47 +0200124patch914575_nonascii_from1 = """
125 1. Beautiful is beTTer than ugly.
126 2. Explicit is better than ımplıcıt.
127 3. Simple is better than complex.
128 4. Complex is better than complicated.
129"""
130
131patch914575_nonascii_to1 = """
132 1. Beautiful is better than ügly.
133 3. Sımple is better than complex.
134 4. Complicated is better than cömplex.
135 5. Flat is better than nested.
136"""
137
Martin v. Löwise064b412004-08-29 16:34:40 +0000138patch914575_from2 = """
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700139\t\tLine 1: preceded by from:[tt] to:[ssss]
140 \t\tLine 2: preceded by from:[sstt] to:[sssst]
141 \t \tLine 3: preceded by from:[sstst] to:[ssssss]
Martin v. Löwise064b412004-08-29 16:34:40 +0000142Line 4: \thas from:[sst] to:[sss] after :
143Line 5: has from:[t] to:[ss] at end\t
144"""
145
146patch914575_to2 = """
Raymond Hettinger15f44ab2016-08-30 10:47:49 -0700147 Line 1: preceded by from:[tt] to:[ssss]
148 \tLine 2: preceded by from:[sstt] to:[sssst]
149 Line 3: preceded by from:[sstst] to:[ssssss]
Martin v. Löwise064b412004-08-29 16:34:40 +0000150Line 4: has from:[sst] to:[sss] after :
Tim Peters48bd7f32004-08-29 22:38:38 +0000151Line 5: has from:[t] to:[ss] at end
Martin v. Löwise064b412004-08-29 16:34:40 +0000152"""
153
154patch914575_from3 = """line 0
1551234567890123456789012345689012345
156line 1
157line 2
158line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000159line 4 changed
160line 5 changed
161line 6 changed
Martin v. Löwise064b412004-08-29 16:34:40 +0000162line 7
163line 8 subtracted
164line 9
1651234567890123456789012345689012345
166short line
167just fits in!!
168just fits in two lines yup!!
169the end"""
170
171patch914575_to3 = """line 0
1721234567890123456789012345689012345
173line 1
174line 2 added
175line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000176line 4 chanGEd
177line 5a chanGed
178line 6a changEd
Martin v. Löwise064b412004-08-29 16:34:40 +0000179line 7
180line 8
181line 9
1821234567890
183another long line that needs to be wrapped
184just fitS in!!
185just fits in two lineS yup!!
186the end"""
187
188class TestSFpatches(unittest.TestCase):
189
190 def test_html_diff(self):
191 # Check SF patch 914575 for generating HTML differences
192 f1a = ((patch914575_from1 + '123\n'*10)*3)
193 t1a = (patch914575_to1 + '123\n'*10)*3
194 f1b = '456\n'*10 + f1a
195 t1b = '456\n'*10 + t1a
196 f1a = f1a.splitlines()
197 t1a = t1a.splitlines()
198 f1b = f1b.splitlines()
199 t1b = t1b.splitlines()
200 f2 = patch914575_from2.splitlines()
201 t2 = patch914575_to2.splitlines()
202 f3 = patch914575_from3
203 t3 = patch914575_to3
204 i = difflib.HtmlDiff()
205 j = difflib.HtmlDiff(tabsize=2)
206 k = difflib.HtmlDiff(wrapcolumn=14)
Tim Peters48bd7f32004-08-29 22:38:38 +0000207
Martin v. Löwise064b412004-08-29 16:34:40 +0000208 full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
209 tables = '\n'.join(
210 [
Tim Peters48bd7f32004-08-29 22:38:38 +0000211 '<h2>Context (first diff within numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000212 i.make_table(f1a,t1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000213 '<h2>Context (first diff after numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000214 i.make_table(f1b,t1b,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000215 '<h2>Context (numlines=6)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000216 i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
Tim Peters48bd7f32004-08-29 22:38:38 +0000217 '<h2>Context (numlines=0)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000218 i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
Tim Peters48bd7f32004-08-29 22:38:38 +0000219 '<h2>Same Context</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000220 i.make_table(f1a,f1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000221 '<h2>Same Full</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000222 i.make_table(f1a,f1a,'from','to',context=False),
223 '<h2>Empty Context</h2>',
224 i.make_table([],[],'from','to',context=True),
225 '<h2>Empty Full</h2>',
226 i.make_table([],[],'from','to',context=False),
227 '<h2>tabsize=2</h2>',
228 j.make_table(f2,t2),
229 '<h2>tabsize=default</h2>',
230 i.make_table(f2,t2),
231 '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
232 k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
233 '<h2>wrapcolumn=14,splitlines()</h2>',
234 k.make_table(f3.splitlines(),t3.splitlines()),
235 '<h2>wrapcolumn=14,splitlines(True)</h2>',
236 k.make_table(f3.splitlines(True),t3.splitlines(True)),
237 ])
238 actual = full.replace('</body>','\n%s\n</body>' % tables)
Tim Peters48bd7f32004-08-29 22:38:38 +0000239
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000240 # temporarily uncomment next two lines to baseline this test
241 #with open('test_difflib_expect.html','w') as fp:
242 # fp.write(actual)
Tim Peters48bd7f32004-08-29 22:38:38 +0000243
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000244 with open(findfile('test_difflib_expect.html')) as fp:
245 self.assertEqual(actual, fp.read())
Martin v. Löwise064b412004-08-29 16:34:40 +0000246
Gustavo Niemeyer548148812006-01-31 18:34:13 +0000247 def test_recursion_limit(self):
248 # Check if the problem described in patch #1413711 exists.
249 limit = sys.getrecursionlimit()
250 old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
251 new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
252 difflib.SequenceMatcher(None, old, new).get_opcodes()
253
Berker Peksag102029d2015-03-15 01:18:47 +0200254 def test_make_file_default_charset(self):
255 html_diff = difflib.HtmlDiff()
256 output = html_diff.make_file(patch914575_from1.splitlines(),
257 patch914575_to1.splitlines())
258 self.assertIn('content="text/html; charset=utf-8"', output)
259
260 def test_make_file_iso88591_charset(self):
261 html_diff = difflib.HtmlDiff()
262 output = html_diff.make_file(patch914575_from1.splitlines(),
263 patch914575_to1.splitlines(),
264 charset='iso-8859-1')
265 self.assertIn('content="text/html; charset=iso-8859-1"', output)
266
267 def test_make_file_usascii_charset_with_nonascii_input(self):
268 html_diff = difflib.HtmlDiff()
269 output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
270 patch914575_nonascii_to1.splitlines(),
271 charset='us-ascii')
272 self.assertIn('content="text/html; charset=us-ascii"', output)
273 self.assertIn('&#305;mpl&#305;c&#305;t', output)
274
Gustavo Niemeyer548148812006-01-31 18:34:13 +0000275
R. David Murrayb2416e52010-04-12 16:58:02 +0000276class TestOutputFormat(unittest.TestCase):
277 def test_tab_delimiter(self):
278 args = ['one', 'two', 'Original', 'Current',
279 '2005-01-26 23:30:50', '2010-04-02 10:20:52']
280 ud = difflib.unified_diff(*args, lineterm='')
281 self.assertEqual(list(ud)[0:2], [
282 "--- Original\t2005-01-26 23:30:50",
283 "+++ Current\t2010-04-02 10:20:52"])
284 cd = difflib.context_diff(*args, lineterm='')
285 self.assertEqual(list(cd)[0:2], [
286 "*** Original\t2005-01-26 23:30:50",
287 "--- Current\t2010-04-02 10:20:52"])
288
289 def test_no_trailing_tab_on_empty_filedate(self):
290 args = ['one', 'two', 'Original', 'Current']
291 ud = difflib.unified_diff(*args, lineterm='')
292 self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
293
294 cd = difflib.context_diff(*args, lineterm='')
295 self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
296
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700297 def test_range_format_unified(self):
Raymond Hettinger49353d02011-04-11 12:40:58 -0700298 # Per the diff spec at http://www.unix.org/single_unix_specification/
299 spec = '''\
300 Each <range> field shall be of the form:
301 %1d", <beginning line number> if the range contains exactly one line,
302 and:
303 "%1d,%1d", <beginning line number>, <number of lines> otherwise.
304 If a range is empty, its beginning line number shall be the number of
305 the line just before the range, or 0 if the empty range starts the file.
306 '''
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700307 fmt = difflib._format_range_unified
Raymond Hettinger49353d02011-04-11 12:40:58 -0700308 self.assertEqual(fmt(3,3), '3,0')
309 self.assertEqual(fmt(3,4), '4')
310 self.assertEqual(fmt(3,5), '4,2')
311 self.assertEqual(fmt(3,6), '4,3')
312 self.assertEqual(fmt(0,0), '0,0')
R. David Murrayb2416e52010-04-12 16:58:02 +0000313
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700314 def test_range_format_context(self):
315 # Per the diff spec at http://www.unix.org/single_unix_specification/
316 spec = '''\
317 The range of lines in file1 shall be written in the following format
318 if the range contains two or more lines:
319 "*** %d,%d ****\n", <beginning line number>, <ending line number>
320 and the following format otherwise:
321 "*** %d ****\n", <ending line number>
322 The ending line number of an empty range shall be the number of the preceding line,
323 or 0 if the range is at the start of the file.
324
325 Next, the range of lines in file2 shall be written in the following format
326 if the range contains two or more lines:
327 "--- %d,%d ----\n", <beginning line number>, <ending line number>
328 and the following format otherwise:
329 "--- %d ----\n", <ending line number>
330 '''
331 fmt = difflib._format_range_context
332 self.assertEqual(fmt(3,3), '3')
333 self.assertEqual(fmt(3,4), '4')
334 self.assertEqual(fmt(3,5), '4,5')
335 self.assertEqual(fmt(3,6), '4,6')
336 self.assertEqual(fmt(0,0), '0')
337
338
Greg Ward4d9d2562015-04-20 20:21:21 -0400339class TestBytes(unittest.TestCase):
340 # don't really care about the content of the output, just the fact
341 # that it's bytes and we don't crash
342 def check(self, diff):
343 diff = list(diff) # trigger exceptions first
344 for line in diff:
345 self.assertIsInstance(
346 line, bytes,
347 "all lines of diff should be bytes, but got: %r" % line)
348
349 def test_byte_content(self):
350 # if we receive byte strings, we return byte strings
351 a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes
352 b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
353
354 unified = difflib.unified_diff
355 context = difflib.context_diff
356
357 check = self.check
358 check(difflib.diff_bytes(unified, a, a))
359 check(difflib.diff_bytes(unified, a, b))
360
361 # now with filenames (content and filenames are all bytes!)
362 check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
363 check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
364
365 # and with filenames and dates
366 check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
367 check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
368
369 # same all over again, with context diff
370 check(difflib.diff_bytes(context, a, a))
371 check(difflib.diff_bytes(context, a, b))
372 check(difflib.diff_bytes(context, a, a, b'a', b'a'))
373 check(difflib.diff_bytes(context, a, b, b'a', b'b'))
374 check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
375 check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
376
377 def test_byte_filenames(self):
378 # somebody renamed a file from ISO-8859-2 to UTF-8
379 fna = b'\xb3odz.txt' # "łodz.txt"
380 fnb = b'\xc5\x82odz.txt'
381
382 # they transcoded the content at the same time
383 a = [b'\xa3odz is a city in Poland.']
384 b = [b'\xc5\x81odz is a city in Poland.']
385
386 check = self.check
387 unified = difflib.unified_diff
388 context = difflib.context_diff
389 check(difflib.diff_bytes(unified, a, b, fna, fnb))
390 check(difflib.diff_bytes(context, a, b, fna, fnb))
391
392 def assertDiff(expect, actual):
393 # do not compare expect and equal as lists, because unittest
394 # uses difflib to report difference between lists
395 actual = list(actual)
396 self.assertEqual(len(expect), len(actual))
397 for e, a in zip(expect, actual):
398 self.assertEqual(e, a)
399
400 expect = [
401 b'--- \xb3odz.txt',
402 b'+++ \xc5\x82odz.txt',
403 b'@@ -1 +1 @@',
404 b'-\xa3odz is a city in Poland.',
405 b'+\xc5\x81odz is a city in Poland.',
406 ]
407 actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
408 assertDiff(expect, actual)
409
410 # with dates (plain ASCII)
411 datea = b'2005-03-18'
412 dateb = b'2005-03-19'
413 check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
414 check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
415
416 expect = [
417 # note the mixed encodings here: this is deeply wrong by every
418 # tenet of Unicode, but it doesn't crash, it's parseable by
419 # patch, and it's how UNIX(tm) diff behaves
420 b'--- \xb3odz.txt\t2005-03-18',
421 b'+++ \xc5\x82odz.txt\t2005-03-19',
422 b'@@ -1 +1 @@',
423 b'-\xa3odz is a city in Poland.',
424 b'+\xc5\x81odz is a city in Poland.',
425 ]
426 actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
427 lineterm=b'')
428 assertDiff(expect, actual)
429
430 def test_mixed_types_content(self):
431 # type of input content must be consistent: all str or all bytes
432 a = [b'hello']
433 b = ['hello']
434
435 unified = difflib.unified_diff
436 context = difflib.context_diff
437
438 expect = "lines to compare must be str, not bytes (b'hello')"
439 self._assert_type_error(expect, unified, a, b)
440 self._assert_type_error(expect, unified, b, a)
441 self._assert_type_error(expect, context, a, b)
442 self._assert_type_error(expect, context, b, a)
443
444 expect = "all arguments must be bytes, not str ('hello')"
445 self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
446 self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
447 self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
448 self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
449
450 def test_mixed_types_filenames(self):
451 # cannot pass filenames as bytes if content is str (this may not be
452 # the right behaviour, but at least the test demonstrates how
453 # things work)
454 a = ['hello\n']
455 b = ['ohell\n']
456 fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1
457 fnb = b'ol\xc3a9.txt' # to UTF-8
458 self._assert_type_error(
459 "all arguments must be str, not: b'ol\\xe9.txt'",
460 difflib.unified_diff, a, b, fna, fnb)
461
462 def test_mixed_types_dates(self):
463 # type of dates must be consistent with type of contents
464 a = [b'foo\n']
465 b = [b'bar\n']
466 datea = '1 fév'
467 dateb = '3 fév'
468 self._assert_type_error(
469 "all arguments must be bytes, not str ('1 fév')",
470 difflib.diff_bytes, difflib.unified_diff,
471 a, b, b'a', b'b', datea, dateb)
472
473 # if input is str, non-ASCII dates are fine
474 a = ['foo\n']
475 b = ['bar\n']
476 list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
477
478 def _assert_type_error(self, msg, generator, *args):
479 with self.assertRaises(TypeError) as ctx:
480 list(generator(*args))
481 self.assertEqual(msg, str(ctx.exception))
482
Jamie Davis0e6c8ee2018-03-04 00:33:32 -0500483class TestJunkAPIs(unittest.TestCase):
484 def test_is_line_junk_true(self):
485 for line in ['#', ' ', ' #', '# ', ' # ', '']:
486 self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
487
488 def test_is_line_junk_false(self):
489 for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
490 self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
491
492 def test_is_line_junk_REDOS(self):
493 evil_input = ('\t' * 1000000) + '##'
494 self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
495
496 def test_is_character_junk_true(self):
497 for char in [' ', '\t']:
498 self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
499
500 def test_is_character_junk_false(self):
501 for char in ['a', '#', '\n', '\f', '\r', '\v']:
502 self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
Greg Ward4d9d2562015-04-20 20:21:21 -0400503
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000504def test_main():
505 difflib.HtmlDiff._default_prefix = 0
506 Doctests = doctest.DocTestSuite(difflib)
Terry Reedy99f96372010-11-25 06:12:34 +0000507 run_unittest(
508 TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
Jamie Davis0e6c8ee2018-03-04 00:33:32 -0500509 TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
Raymond Hettinger43d790c2003-07-16 04:34:56 +0000510
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000511if __name__ == '__main__':
512 test_main()