| import difflib |
| from test.support import run_unittest, findfile |
| import unittest |
| import doctest |
| import sys |
| |
| |
| class TestWithAscii(unittest.TestCase): |
| def test_one_insert(self): |
| sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) |
| self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| self.assertEqual(list(sm.get_opcodes()), |
| [ ('insert', 0, 0, 0, 1), |
| ('equal', 0, 100, 1, 101)]) |
| self.assertEqual(sm.bpopular, set()) |
| sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) |
| self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| self.assertEqual(list(sm.get_opcodes()), |
| [ ('equal', 0, 50, 0, 50), |
| ('insert', 50, 50, 50, 51), |
| ('equal', 50, 100, 51, 101)]) |
| self.assertEqual(sm.bpopular, set()) |
| |
| def test_one_delete(self): |
| sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) |
| self.assertAlmostEqual(sm.ratio(), 0.994, places=3) |
| self.assertEqual(list(sm.get_opcodes()), |
| [ ('equal', 0, 40, 0, 40), |
| ('delete', 40, 41, 40, 40), |
| ('equal', 41, 81, 40, 80)]) |
| |
| def test_bjunk(self): |
| sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) |
| self.assertEqual(sm.bjunk, set()) |
| |
| sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| self.assertEqual(sm.bjunk, {' '}) |
| |
| sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'], |
| a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| self.assertEqual(sm.bjunk, {' ', 'b'}) |
| |
| |
| class TestAutojunk(unittest.TestCase): |
| """Tests for the autojunk parameter added in 2.7""" |
| def test_one_insert_homogenous_sequence(self): |
| # By default autojunk=True and the heuristic kicks in for a sequence |
| # of length 200+ |
| seq1 = 'b' * 200 |
| seq2 = 'a' + 'b' * 200 |
| |
| sm = difflib.SequenceMatcher(None, seq1, seq2) |
| self.assertAlmostEqual(sm.ratio(), 0, places=3) |
| self.assertEqual(sm.bpopular, {'b'}) |
| |
| # Now turn the heuristic off |
| sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) |
| self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) |
| self.assertEqual(sm.bpopular, set()) |
| |
| |
| class TestSFbugs(unittest.TestCase): |
| def test_ratio_for_null_seqn(self): |
| # Check clearing of SF bug 763023 |
| s = difflib.SequenceMatcher(None, [], []) |
| self.assertEqual(s.ratio(), 1) |
| self.assertEqual(s.quick_ratio(), 1) |
| self.assertEqual(s.real_quick_ratio(), 1) |
| |
| def test_comparing_empty_lists(self): |
| # Check fix for bug #979794 |
| group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() |
| self.assertRaises(StopIteration, next, group_gen) |
| diff_gen = difflib.unified_diff([], []) |
| self.assertRaises(StopIteration, next, diff_gen) |
| |
| def test_matching_blocks_cache(self): |
| # Issue #21635 |
| s = difflib.SequenceMatcher(None, "abxcd", "abcd") |
| first = s.get_matching_blocks() |
| second = s.get_matching_blocks() |
| self.assertEqual(second[0].size, 2) |
| self.assertEqual(second[1].size, 2) |
| self.assertEqual(second[2].size, 0) |
| |
| def test_added_tab_hint(self): |
| # Check fix for bug #1488943 |
| diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) |
| self.assertEqual("- \tI am a buggy", diff[0]) |
| self.assertEqual("? --\n", diff[1]) |
| self.assertEqual("+ \t\tI am a bug", diff[2]) |
| self.assertEqual("? +\n", diff[3]) |
| |
| patch914575_from1 = """ |
| 1. Beautiful is beTTer than ugly. |
| 2. Explicit is better than implicit. |
| 3. Simple is better than complex. |
| 4. Complex is better than complicated. |
| """ |
| |
| patch914575_to1 = """ |
| 1. Beautiful is better than ugly. |
| 3. Simple is better than complex. |
| 4. Complicated is better than complex. |
| 5. Flat is better than nested. |
| """ |
| |
| patch914575_nonascii_from1 = """ |
| 1. Beautiful is beTTer than ugly. |
| 2. Explicit is better than ımplıcıt. |
| 3. Simple is better than complex. |
| 4. Complex is better than complicated. |
| """ |
| |
| patch914575_nonascii_to1 = """ |
| 1. Beautiful is better than ügly. |
| 3. Sımple is better than complex. |
| 4. Complicated is better than cömplex. |
| 5. Flat is better than nested. |
| """ |
| |
| patch914575_from2 = """ |
| \t\tLine 1: preceeded by from:[tt] to:[ssss] |
| \t\tLine 2: preceeded by from:[sstt] to:[sssst] |
| \t \tLine 3: preceeded by from:[sstst] to:[ssssss] |
| Line 4: \thas from:[sst] to:[sss] after : |
| Line 5: has from:[t] to:[ss] at end\t |
| """ |
| |
| patch914575_to2 = """ |
| Line 1: preceeded by from:[tt] to:[ssss] |
| \tLine 2: preceeded by from:[sstt] to:[sssst] |
| Line 3: preceeded by from:[sstst] to:[ssssss] |
| Line 4: has from:[sst] to:[sss] after : |
| Line 5: has from:[t] to:[ss] at end |
| """ |
| |
| patch914575_from3 = """line 0 |
| 1234567890123456789012345689012345 |
| line 1 |
| line 2 |
| line 3 |
| line 4 changed |
| line 5 changed |
| line 6 changed |
| line 7 |
| line 8 subtracted |
| line 9 |
| 1234567890123456789012345689012345 |
| short line |
| just fits in!! |
| just fits in two lines yup!! |
| the end""" |
| |
| patch914575_to3 = """line 0 |
| 1234567890123456789012345689012345 |
| line 1 |
| line 2 added |
| line 3 |
| line 4 chanGEd |
| line 5a chanGed |
| line 6a changEd |
| line 7 |
| line 8 |
| line 9 |
| 1234567890 |
| another long line that needs to be wrapped |
| just fitS in!! |
| just fits in two lineS yup!! |
| the end""" |
| |
| class TestSFpatches(unittest.TestCase): |
| |
| def test_html_diff(self): |
| # Check SF patch 914575 for generating HTML differences |
| f1a = ((patch914575_from1 + '123\n'*10)*3) |
| t1a = (patch914575_to1 + '123\n'*10)*3 |
| f1b = '456\n'*10 + f1a |
| t1b = '456\n'*10 + t1a |
| f1a = f1a.splitlines() |
| t1a = t1a.splitlines() |
| f1b = f1b.splitlines() |
| t1b = t1b.splitlines() |
| f2 = patch914575_from2.splitlines() |
| t2 = patch914575_to2.splitlines() |
| f3 = patch914575_from3 |
| t3 = patch914575_to3 |
| i = difflib.HtmlDiff() |
| j = difflib.HtmlDiff(tabsize=2) |
| k = difflib.HtmlDiff(wrapcolumn=14) |
| |
| full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) |
| tables = '\n'.join( |
| [ |
| '<h2>Context (first diff within numlines=5(default))</h2>', |
| i.make_table(f1a,t1a,'from','to',context=True), |
| '<h2>Context (first diff after numlines=5(default))</h2>', |
| i.make_table(f1b,t1b,'from','to',context=True), |
| '<h2>Context (numlines=6)</h2>', |
| i.make_table(f1a,t1a,'from','to',context=True,numlines=6), |
| '<h2>Context (numlines=0)</h2>', |
| i.make_table(f1a,t1a,'from','to',context=True,numlines=0), |
| '<h2>Same Context</h2>', |
| i.make_table(f1a,f1a,'from','to',context=True), |
| '<h2>Same Full</h2>', |
| i.make_table(f1a,f1a,'from','to',context=False), |
| '<h2>Empty Context</h2>', |
| i.make_table([],[],'from','to',context=True), |
| '<h2>Empty Full</h2>', |
| i.make_table([],[],'from','to',context=False), |
| '<h2>tabsize=2</h2>', |
| j.make_table(f2,t2), |
| '<h2>tabsize=default</h2>', |
| i.make_table(f2,t2), |
| '<h2>Context (wrapcolumn=14,numlines=0)</h2>', |
| k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), |
| '<h2>wrapcolumn=14,splitlines()</h2>', |
| k.make_table(f3.splitlines(),t3.splitlines()), |
| '<h2>wrapcolumn=14,splitlines(True)</h2>', |
| k.make_table(f3.splitlines(True),t3.splitlines(True)), |
| ]) |
| actual = full.replace('</body>','\n%s\n</body>' % tables) |
| |
| # temporarily uncomment next two lines to baseline this test |
| #with open('test_difflib_expect.html','w') as fp: |
| # fp.write(actual) |
| |
| with open(findfile('test_difflib_expect.html')) as fp: |
| self.assertEqual(actual, fp.read()) |
| |
| def test_recursion_limit(self): |
| # Check if the problem described in patch #1413711 exists. |
| limit = sys.getrecursionlimit() |
| old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] |
| new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] |
| difflib.SequenceMatcher(None, old, new).get_opcodes() |
| |
| def test_make_file_default_charset(self): |
| html_diff = difflib.HtmlDiff() |
| output = html_diff.make_file(patch914575_from1.splitlines(), |
| patch914575_to1.splitlines()) |
| self.assertIn('content="text/html; charset=utf-8"', output) |
| |
| def test_make_file_iso88591_charset(self): |
| html_diff = difflib.HtmlDiff() |
| output = html_diff.make_file(patch914575_from1.splitlines(), |
| patch914575_to1.splitlines(), |
| charset='iso-8859-1') |
| self.assertIn('content="text/html; charset=iso-8859-1"', output) |
| |
| def test_make_file_usascii_charset_with_nonascii_input(self): |
| html_diff = difflib.HtmlDiff() |
| output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), |
| patch914575_nonascii_to1.splitlines(), |
| charset='us-ascii') |
| self.assertIn('content="text/html; charset=us-ascii"', output) |
| self.assertIn('ımplıcıt', output) |
| |
| |
| class TestOutputFormat(unittest.TestCase): |
| def test_tab_delimiter(self): |
| args = ['one', 'two', 'Original', 'Current', |
| '2005-01-26 23:30:50', '2010-04-02 10:20:52'] |
| ud = difflib.unified_diff(*args, lineterm='') |
| self.assertEqual(list(ud)[0:2], [ |
| "--- Original\t2005-01-26 23:30:50", |
| "+++ Current\t2010-04-02 10:20:52"]) |
| cd = difflib.context_diff(*args, lineterm='') |
| self.assertEqual(list(cd)[0:2], [ |
| "*** Original\t2005-01-26 23:30:50", |
| "--- Current\t2010-04-02 10:20:52"]) |
| |
| def test_no_trailing_tab_on_empty_filedate(self): |
| args = ['one', 'two', 'Original', 'Current'] |
| ud = difflib.unified_diff(*args, lineterm='') |
| self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) |
| |
| cd = difflib.context_diff(*args, lineterm='') |
| self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) |
| |
| def test_range_format_unified(self): |
| # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| spec = '''\ |
| Each <range> field shall be of the form: |
| %1d", <beginning line number> if the range contains exactly one line, |
| and: |
| "%1d,%1d", <beginning line number>, <number of lines> otherwise. |
| If a range is empty, its beginning line number shall be the number of |
| the line just before the range, or 0 if the empty range starts the file. |
| ''' |
| fmt = difflib._format_range_unified |
| self.assertEqual(fmt(3,3), '3,0') |
| self.assertEqual(fmt(3,4), '4') |
| self.assertEqual(fmt(3,5), '4,2') |
| self.assertEqual(fmt(3,6), '4,3') |
| self.assertEqual(fmt(0,0), '0,0') |
| |
| def test_range_format_context(self): |
| # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| spec = '''\ |
| The range of lines in file1 shall be written in the following format |
| if the range contains two or more lines: |
| "*** %d,%d ****\n", <beginning line number>, <ending line number> |
| and the following format otherwise: |
| "*** %d ****\n", <ending line number> |
| The ending line number of an empty range shall be the number of the preceding line, |
| or 0 if the range is at the start of the file. |
| |
| Next, the range of lines in file2 shall be written in the following format |
| if the range contains two or more lines: |
| "--- %d,%d ----\n", <beginning line number>, <ending line number> |
| and the following format otherwise: |
| "--- %d ----\n", <ending line number> |
| ''' |
| fmt = difflib._format_range_context |
| self.assertEqual(fmt(3,3), '3') |
| self.assertEqual(fmt(3,4), '4') |
| self.assertEqual(fmt(3,5), '4,5') |
| self.assertEqual(fmt(3,6), '4,6') |
| self.assertEqual(fmt(0,0), '0') |
| |
| |
| class TestBytes(unittest.TestCase): |
| # don't really care about the content of the output, just the fact |
| # that it's bytes and we don't crash |
| def check(self, diff): |
| diff = list(diff) # trigger exceptions first |
| for line in diff: |
| self.assertIsInstance( |
| line, bytes, |
| "all lines of diff should be bytes, but got: %r" % line) |
| |
| def test_byte_content(self): |
| # if we receive byte strings, we return byte strings |
| a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes |
| b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes |
| |
| unified = difflib.unified_diff |
| context = difflib.context_diff |
| |
| check = self.check |
| check(difflib.diff_bytes(unified, a, a)) |
| check(difflib.diff_bytes(unified, a, b)) |
| |
| # now with filenames (content and filenames are all bytes!) |
| check(difflib.diff_bytes(unified, a, a, b'a', b'a')) |
| check(difflib.diff_bytes(unified, a, b, b'a', b'b')) |
| |
| # and with filenames and dates |
| check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) |
| check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) |
| |
| # same all over again, with context diff |
| check(difflib.diff_bytes(context, a, a)) |
| check(difflib.diff_bytes(context, a, b)) |
| check(difflib.diff_bytes(context, a, a, b'a', b'a')) |
| check(difflib.diff_bytes(context, a, b, b'a', b'b')) |
| check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) |
| check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) |
| |
| def test_byte_filenames(self): |
| # somebody renamed a file from ISO-8859-2 to UTF-8 |
| fna = b'\xb3odz.txt' # "łodz.txt" |
| fnb = b'\xc5\x82odz.txt' |
| |
| # they transcoded the content at the same time |
| a = [b'\xa3odz is a city in Poland.'] |
| b = [b'\xc5\x81odz is a city in Poland.'] |
| |
| check = self.check |
| unified = difflib.unified_diff |
| context = difflib.context_diff |
| check(difflib.diff_bytes(unified, a, b, fna, fnb)) |
| check(difflib.diff_bytes(context, a, b, fna, fnb)) |
| |
| def assertDiff(expect, actual): |
| # do not compare expect and equal as lists, because unittest |
| # uses difflib to report difference between lists |
| actual = list(actual) |
| self.assertEqual(len(expect), len(actual)) |
| for e, a in zip(expect, actual): |
| self.assertEqual(e, a) |
| |
| expect = [ |
| b'--- \xb3odz.txt', |
| b'+++ \xc5\x82odz.txt', |
| b'@@ -1 +1 @@', |
| b'-\xa3odz is a city in Poland.', |
| b'+\xc5\x81odz is a city in Poland.', |
| ] |
| actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') |
| assertDiff(expect, actual) |
| |
| # with dates (plain ASCII) |
| datea = b'2005-03-18' |
| dateb = b'2005-03-19' |
| check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) |
| check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) |
| |
| expect = [ |
| # note the mixed encodings here: this is deeply wrong by every |
| # tenet of Unicode, but it doesn't crash, it's parseable by |
| # patch, and it's how UNIX(tm) diff behaves |
| b'--- \xb3odz.txt\t2005-03-18', |
| b'+++ \xc5\x82odz.txt\t2005-03-19', |
| b'@@ -1 +1 @@', |
| b'-\xa3odz is a city in Poland.', |
| b'+\xc5\x81odz is a city in Poland.', |
| ] |
| actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, |
| lineterm=b'') |
| assertDiff(expect, actual) |
| |
| def test_mixed_types_content(self): |
| # type of input content must be consistent: all str or all bytes |
| a = [b'hello'] |
| b = ['hello'] |
| |
| unified = difflib.unified_diff |
| context = difflib.context_diff |
| |
| expect = "lines to compare must be str, not bytes (b'hello')" |
| self._assert_type_error(expect, unified, a, b) |
| self._assert_type_error(expect, unified, b, a) |
| self._assert_type_error(expect, context, a, b) |
| self._assert_type_error(expect, context, b, a) |
| |
| expect = "all arguments must be bytes, not str ('hello')" |
| self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) |
| self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) |
| self._assert_type_error(expect, difflib.diff_bytes, context, a, b) |
| self._assert_type_error(expect, difflib.diff_bytes, context, b, a) |
| |
| def test_mixed_types_filenames(self): |
| # cannot pass filenames as bytes if content is str (this may not be |
| # the right behaviour, but at least the test demonstrates how |
| # things work) |
| a = ['hello\n'] |
| b = ['ohell\n'] |
| fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 |
| fnb = b'ol\xc3a9.txt' # to UTF-8 |
| self._assert_type_error( |
| "all arguments must be str, not: b'ol\\xe9.txt'", |
| difflib.unified_diff, a, b, fna, fnb) |
| |
| def test_mixed_types_dates(self): |
| # type of dates must be consistent with type of contents |
| a = [b'foo\n'] |
| b = [b'bar\n'] |
| datea = '1 fév' |
| dateb = '3 fév' |
| self._assert_type_error( |
| "all arguments must be bytes, not str ('1 fév')", |
| difflib.diff_bytes, difflib.unified_diff, |
| a, b, b'a', b'b', datea, dateb) |
| |
| # if input is str, non-ASCII dates are fine |
| a = ['foo\n'] |
| b = ['bar\n'] |
| list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) |
| |
| def _assert_type_error(self, msg, generator, *args): |
| with self.assertRaises(TypeError) as ctx: |
| list(generator(*args)) |
| self.assertEqual(msg, str(ctx.exception)) |
| |
| |
| def test_main(): |
| difflib.HtmlDiff._default_prefix = 0 |
| Doctests = doctest.DocTestSuite(difflib) |
| run_unittest( |
| TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, |
| TestOutputFormat, TestBytes, Doctests) |
| |
| if __name__ == '__main__': |
| test_main() |