Barry Warsaw | 04f357c | 2002-07-23 19:04:11 +0000 | [diff] [blame] | 1 | import difflib |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 2 | from test.support import run_unittest, findfile |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 3 | import unittest |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 4 | import doctest |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 5 | import sys |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 6 | |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 7 | |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 8 | class TestWithAscii(unittest.TestCase): |
| 9 | def test_one_insert(self): |
| 10 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) |
| 11 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 12 | self.assertEqual(list(sm.get_opcodes()), |
| 13 | [ ('insert', 0, 0, 0, 1), |
| 14 | ('equal', 0, 100, 1, 101)]) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 15 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 16 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) |
| 17 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 18 | self.assertEqual(list(sm.get_opcodes()), |
| 19 | [ ('equal', 0, 50, 0, 50), |
| 20 | ('insert', 50, 50, 50, 51), |
| 21 | ('equal', 50, 100, 51, 101)]) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 22 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 23 | |
| 24 | def test_one_delete(self): |
| 25 | sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) |
| 26 | self.assertAlmostEqual(sm.ratio(), 0.994, places=3) |
| 27 | self.assertEqual(list(sm.get_opcodes()), |
| 28 | [ ('equal', 0, 40, 0, 40), |
| 29 | ('delete', 40, 41, 40, 40), |
| 30 | ('equal', 41, 81, 40, 80)]) |
| 31 | |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 32 | def test_bjunk(self): |
| 33 | sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| 34 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) |
| 35 | self.assertEqual(sm.bjunk, set()) |
| 36 | |
| 37 | sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| 38 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| 39 | self.assertEqual(sm.bjunk, {' '}) |
| 40 | |
| 41 | sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'], |
| 42 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| 43 | self.assertEqual(sm.bjunk, {' ', 'b'}) |
| 44 | |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 45 | |
| 46 | class TestAutojunk(unittest.TestCase): |
| 47 | """Tests for the autojunk parameter added in 2.7""" |
| 48 | def test_one_insert_homogenous_sequence(self): |
| 49 | # By default autojunk=True and the heuristic kicks in for a sequence |
| 50 | # of length 200+ |
| 51 | seq1 = 'b' * 200 |
| 52 | seq2 = 'a' + 'b' * 200 |
| 53 | |
| 54 | sm = difflib.SequenceMatcher(None, seq1, seq2) |
| 55 | self.assertAlmostEqual(sm.ratio(), 0, places=3) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 56 | self.assertEqual(sm.bpopular, {'b'}) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 57 | |
| 58 | # Now turn the heuristic off |
| 59 | sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) |
| 60 | self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 61 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 62 | |
| 63 | |
| 64 | class TestSFbugs(unittest.TestCase): |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 65 | def test_ratio_for_null_seqn(self): |
| 66 | # Check clearing of SF bug 763023 |
| 67 | s = difflib.SequenceMatcher(None, [], []) |
| 68 | self.assertEqual(s.ratio(), 1) |
| 69 | self.assertEqual(s.quick_ratio(), 1) |
| 70 | self.assertEqual(s.real_quick_ratio(), 1) |
| 71 | |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 72 | def test_comparing_empty_lists(self): |
| 73 | # Check fix for bug #979794 |
| 74 | group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() |
Georg Brandl | a18af4e | 2007-04-21 15:47:16 +0000 | [diff] [blame] | 75 | self.assertRaises(StopIteration, next, group_gen) |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 76 | diff_gen = difflib.unified_diff([], []) |
Georg Brandl | a18af4e | 2007-04-21 15:47:16 +0000 | [diff] [blame] | 77 | self.assertRaises(StopIteration, next, diff_gen) |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 78 | |
Raymond Hettinger | fabefc3 | 2014-06-21 11:57:36 -0700 | [diff] [blame] | 79 | def test_matching_blocks_cache(self): |
| 80 | # Issue #21635 |
| 81 | s = difflib.SequenceMatcher(None, "abxcd", "abcd") |
| 82 | first = s.get_matching_blocks() |
| 83 | second = s.get_matching_blocks() |
| 84 | self.assertEqual(second[0].size, 2) |
| 85 | self.assertEqual(second[1].size, 2) |
| 86 | self.assertEqual(second[2].size, 0) |
| 87 | |
Senthil Kumaran | 758025c | 2009-11-23 19:02:52 +0000 | [diff] [blame] | 88 | def test_added_tab_hint(self): |
| 89 | # Check fix for bug #1488943 |
| 90 | diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) |
| 91 | self.assertEqual("- \tI am a buggy", diff[0]) |
| 92 | self.assertEqual("? --\n", diff[1]) |
| 93 | self.assertEqual("+ \t\tI am a bug", diff[2]) |
| 94 | self.assertEqual("? +\n", diff[3]) |
| 95 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 96 | patch914575_from1 = """ |
| 97 | 1. Beautiful is beTTer than ugly. |
| 98 | 2. Explicit is better than implicit. |
| 99 | 3. Simple is better than complex. |
| 100 | 4. Complex is better than complicated. |
| 101 | """ |
| 102 | |
| 103 | patch914575_to1 = """ |
| 104 | 1. Beautiful is better than ugly. |
| 105 | 3. Simple is better than complex. |
| 106 | 4. Complicated is better than complex. |
| 107 | 5. Flat is better than nested. |
| 108 | """ |
| 109 | |
Berker Peksag | 102029d | 2015-03-15 01:18:47 +0200 | [diff] [blame] | 110 | patch914575_nonascii_from1 = """ |
| 111 | 1. Beautiful is beTTer than ugly. |
| 112 | 2. Explicit is better than ımplıcıt. |
| 113 | 3. Simple is better than complex. |
| 114 | 4. Complex is better than complicated. |
| 115 | """ |
| 116 | |
| 117 | patch914575_nonascii_to1 = """ |
| 118 | 1. Beautiful is better than ügly. |
| 119 | 3. Sımple is better than complex. |
| 120 | 4. Complicated is better than cömplex. |
| 121 | 5. Flat is better than nested. |
| 122 | """ |
| 123 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 124 | patch914575_from2 = """ |
Raymond Hettinger | 15f44ab | 2016-08-30 10:47:49 -0700 | [diff] [blame] | 125 | \t\tLine 1: preceded by from:[tt] to:[ssss] |
| 126 | \t\tLine 2: preceded by from:[sstt] to:[sssst] |
| 127 | \t \tLine 3: preceded by from:[sstst] to:[ssssss] |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 128 | Line 4: \thas from:[sst] to:[sss] after : |
| 129 | Line 5: has from:[t] to:[ss] at end\t |
| 130 | """ |
| 131 | |
| 132 | patch914575_to2 = """ |
Raymond Hettinger | 15f44ab | 2016-08-30 10:47:49 -0700 | [diff] [blame] | 133 | Line 1: preceded by from:[tt] to:[ssss] |
| 134 | \tLine 2: preceded by from:[sstt] to:[sssst] |
| 135 | Line 3: preceded by from:[sstst] to:[ssssss] |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 136 | Line 4: has from:[sst] to:[sss] after : |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 137 | Line 5: has from:[t] to:[ss] at end |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 138 | """ |
| 139 | |
| 140 | patch914575_from3 = """line 0 |
| 141 | 1234567890123456789012345689012345 |
| 142 | line 1 |
| 143 | line 2 |
| 144 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 145 | line 4 changed |
| 146 | line 5 changed |
| 147 | line 6 changed |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 148 | line 7 |
| 149 | line 8 subtracted |
| 150 | line 9 |
| 151 | 1234567890123456789012345689012345 |
| 152 | short line |
| 153 | just fits in!! |
| 154 | just fits in two lines yup!! |
| 155 | the end""" |
| 156 | |
| 157 | patch914575_to3 = """line 0 |
| 158 | 1234567890123456789012345689012345 |
| 159 | line 1 |
| 160 | line 2 added |
| 161 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 162 | line 4 chanGEd |
| 163 | line 5a chanGed |
| 164 | line 6a changEd |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 165 | line 7 |
| 166 | line 8 |
| 167 | line 9 |
| 168 | 1234567890 |
| 169 | another long line that needs to be wrapped |
| 170 | just fitS in!! |
| 171 | just fits in two lineS yup!! |
| 172 | the end""" |
| 173 | |
| 174 | class TestSFpatches(unittest.TestCase): |
| 175 | |
| 176 | def test_html_diff(self): |
| 177 | # Check SF patch 914575 for generating HTML differences |
| 178 | f1a = ((patch914575_from1 + '123\n'*10)*3) |
| 179 | t1a = (patch914575_to1 + '123\n'*10)*3 |
| 180 | f1b = '456\n'*10 + f1a |
| 181 | t1b = '456\n'*10 + t1a |
| 182 | f1a = f1a.splitlines() |
| 183 | t1a = t1a.splitlines() |
| 184 | f1b = f1b.splitlines() |
| 185 | t1b = t1b.splitlines() |
| 186 | f2 = patch914575_from2.splitlines() |
| 187 | t2 = patch914575_to2.splitlines() |
| 188 | f3 = patch914575_from3 |
| 189 | t3 = patch914575_to3 |
| 190 | i = difflib.HtmlDiff() |
| 191 | j = difflib.HtmlDiff(tabsize=2) |
| 192 | k = difflib.HtmlDiff(wrapcolumn=14) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 193 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 194 | full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) |
| 195 | tables = '\n'.join( |
| 196 | [ |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 197 | '<h2>Context (first diff within numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 198 | i.make_table(f1a,t1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 199 | '<h2>Context (first diff after numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 200 | i.make_table(f1b,t1b,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 201 | '<h2>Context (numlines=6)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 202 | i.make_table(f1a,t1a,'from','to',context=True,numlines=6), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 203 | '<h2>Context (numlines=0)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 204 | i.make_table(f1a,t1a,'from','to',context=True,numlines=0), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 205 | '<h2>Same Context</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 206 | i.make_table(f1a,f1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 207 | '<h2>Same Full</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 208 | i.make_table(f1a,f1a,'from','to',context=False), |
| 209 | '<h2>Empty Context</h2>', |
| 210 | i.make_table([],[],'from','to',context=True), |
| 211 | '<h2>Empty Full</h2>', |
| 212 | i.make_table([],[],'from','to',context=False), |
| 213 | '<h2>tabsize=2</h2>', |
| 214 | j.make_table(f2,t2), |
| 215 | '<h2>tabsize=default</h2>', |
| 216 | i.make_table(f2,t2), |
| 217 | '<h2>Context (wrapcolumn=14,numlines=0)</h2>', |
| 218 | k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), |
| 219 | '<h2>wrapcolumn=14,splitlines()</h2>', |
| 220 | k.make_table(f3.splitlines(),t3.splitlines()), |
| 221 | '<h2>wrapcolumn=14,splitlines(True)</h2>', |
| 222 | k.make_table(f3.splitlines(True),t3.splitlines(True)), |
| 223 | ]) |
| 224 | actual = full.replace('</body>','\n%s\n</body>' % tables) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 225 | |
Philip Jenvey | a27c5bd | 2009-05-28 06:09:08 +0000 | [diff] [blame] | 226 | # temporarily uncomment next two lines to baseline this test |
| 227 | #with open('test_difflib_expect.html','w') as fp: |
| 228 | # fp.write(actual) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 229 | |
Philip Jenvey | a27c5bd | 2009-05-28 06:09:08 +0000 | [diff] [blame] | 230 | with open(findfile('test_difflib_expect.html')) as fp: |
| 231 | self.assertEqual(actual, fp.read()) |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 232 | |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 233 | def test_recursion_limit(self): |
| 234 | # Check if the problem described in patch #1413711 exists. |
| 235 | limit = sys.getrecursionlimit() |
| 236 | old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] |
| 237 | new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] |
| 238 | difflib.SequenceMatcher(None, old, new).get_opcodes() |
| 239 | |
Berker Peksag | 102029d | 2015-03-15 01:18:47 +0200 | [diff] [blame] | 240 | def test_make_file_default_charset(self): |
| 241 | html_diff = difflib.HtmlDiff() |
| 242 | output = html_diff.make_file(patch914575_from1.splitlines(), |
| 243 | patch914575_to1.splitlines()) |
| 244 | self.assertIn('content="text/html; charset=utf-8"', output) |
| 245 | |
| 246 | def test_make_file_iso88591_charset(self): |
| 247 | html_diff = difflib.HtmlDiff() |
| 248 | output = html_diff.make_file(patch914575_from1.splitlines(), |
| 249 | patch914575_to1.splitlines(), |
| 250 | charset='iso-8859-1') |
| 251 | self.assertIn('content="text/html; charset=iso-8859-1"', output) |
| 252 | |
| 253 | def test_make_file_usascii_charset_with_nonascii_input(self): |
| 254 | html_diff = difflib.HtmlDiff() |
| 255 | output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), |
| 256 | patch914575_nonascii_to1.splitlines(), |
| 257 | charset='us-ascii') |
| 258 | self.assertIn('content="text/html; charset=us-ascii"', output) |
| 259 | self.assertIn('ımplıcıt', output) |
| 260 | |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 261 | |
R. David Murray | b2416e5 | 2010-04-12 16:58:02 +0000 | [diff] [blame] | 262 | class TestOutputFormat(unittest.TestCase): |
| 263 | def test_tab_delimiter(self): |
| 264 | args = ['one', 'two', 'Original', 'Current', |
| 265 | '2005-01-26 23:30:50', '2010-04-02 10:20:52'] |
| 266 | ud = difflib.unified_diff(*args, lineterm='') |
| 267 | self.assertEqual(list(ud)[0:2], [ |
| 268 | "--- Original\t2005-01-26 23:30:50", |
| 269 | "+++ Current\t2010-04-02 10:20:52"]) |
| 270 | cd = difflib.context_diff(*args, lineterm='') |
| 271 | self.assertEqual(list(cd)[0:2], [ |
| 272 | "*** Original\t2005-01-26 23:30:50", |
| 273 | "--- Current\t2010-04-02 10:20:52"]) |
| 274 | |
| 275 | def test_no_trailing_tab_on_empty_filedate(self): |
| 276 | args = ['one', 'two', 'Original', 'Current'] |
| 277 | ud = difflib.unified_diff(*args, lineterm='') |
| 278 | self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) |
| 279 | |
| 280 | cd = difflib.context_diff(*args, lineterm='') |
| 281 | self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) |
| 282 | |
Raymond Hettinger | 9180deb | 2011-04-12 15:25:30 -0700 | [diff] [blame] | 283 | def test_range_format_unified(self): |
Raymond Hettinger | 49353d0 | 2011-04-11 12:40:58 -0700 | [diff] [blame] | 284 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 285 | spec = '''\ |
| 286 | Each <range> field shall be of the form: |
| 287 | %1d", <beginning line number> if the range contains exactly one line, |
| 288 | and: |
| 289 | "%1d,%1d", <beginning line number>, <number of lines> otherwise. |
| 290 | If a range is empty, its beginning line number shall be the number of |
| 291 | the line just before the range, or 0 if the empty range starts the file. |
| 292 | ''' |
Raymond Hettinger | 9180deb | 2011-04-12 15:25:30 -0700 | [diff] [blame] | 293 | fmt = difflib._format_range_unified |
Raymond Hettinger | 49353d0 | 2011-04-11 12:40:58 -0700 | [diff] [blame] | 294 | self.assertEqual(fmt(3,3), '3,0') |
| 295 | self.assertEqual(fmt(3,4), '4') |
| 296 | self.assertEqual(fmt(3,5), '4,2') |
| 297 | self.assertEqual(fmt(3,6), '4,3') |
| 298 | self.assertEqual(fmt(0,0), '0,0') |
R. David Murray | b2416e5 | 2010-04-12 16:58:02 +0000 | [diff] [blame] | 299 | |
Raymond Hettinger | 9180deb | 2011-04-12 15:25:30 -0700 | [diff] [blame] | 300 | def test_range_format_context(self): |
| 301 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 302 | spec = '''\ |
| 303 | The range of lines in file1 shall be written in the following format |
| 304 | if the range contains two or more lines: |
| 305 | "*** %d,%d ****\n", <beginning line number>, <ending line number> |
| 306 | and the following format otherwise: |
| 307 | "*** %d ****\n", <ending line number> |
| 308 | The ending line number of an empty range shall be the number of the preceding line, |
| 309 | or 0 if the range is at the start of the file. |
| 310 | |
| 311 | Next, the range of lines in file2 shall be written in the following format |
| 312 | if the range contains two or more lines: |
| 313 | "--- %d,%d ----\n", <beginning line number>, <ending line number> |
| 314 | and the following format otherwise: |
| 315 | "--- %d ----\n", <ending line number> |
| 316 | ''' |
| 317 | fmt = difflib._format_range_context |
| 318 | self.assertEqual(fmt(3,3), '3') |
| 319 | self.assertEqual(fmt(3,4), '4') |
| 320 | self.assertEqual(fmt(3,5), '4,5') |
| 321 | self.assertEqual(fmt(3,6), '4,6') |
| 322 | self.assertEqual(fmt(0,0), '0') |
| 323 | |
| 324 | |
Greg Ward | 4d9d256 | 2015-04-20 20:21:21 -0400 | [diff] [blame] | 325 | class TestBytes(unittest.TestCase): |
| 326 | # don't really care about the content of the output, just the fact |
| 327 | # that it's bytes and we don't crash |
| 328 | def check(self, diff): |
| 329 | diff = list(diff) # trigger exceptions first |
| 330 | for line in diff: |
| 331 | self.assertIsInstance( |
| 332 | line, bytes, |
| 333 | "all lines of diff should be bytes, but got: %r" % line) |
| 334 | |
| 335 | def test_byte_content(self): |
| 336 | # if we receive byte strings, we return byte strings |
| 337 | a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes |
| 338 | b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes |
| 339 | |
| 340 | unified = difflib.unified_diff |
| 341 | context = difflib.context_diff |
| 342 | |
| 343 | check = self.check |
| 344 | check(difflib.diff_bytes(unified, a, a)) |
| 345 | check(difflib.diff_bytes(unified, a, b)) |
| 346 | |
| 347 | # now with filenames (content and filenames are all bytes!) |
| 348 | check(difflib.diff_bytes(unified, a, a, b'a', b'a')) |
| 349 | check(difflib.diff_bytes(unified, a, b, b'a', b'b')) |
| 350 | |
| 351 | # and with filenames and dates |
| 352 | check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) |
| 353 | check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) |
| 354 | |
| 355 | # same all over again, with context diff |
| 356 | check(difflib.diff_bytes(context, a, a)) |
| 357 | check(difflib.diff_bytes(context, a, b)) |
| 358 | check(difflib.diff_bytes(context, a, a, b'a', b'a')) |
| 359 | check(difflib.diff_bytes(context, a, b, b'a', b'b')) |
| 360 | check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) |
| 361 | check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) |
| 362 | |
| 363 | def test_byte_filenames(self): |
| 364 | # somebody renamed a file from ISO-8859-2 to UTF-8 |
| 365 | fna = b'\xb3odz.txt' # "łodz.txt" |
| 366 | fnb = b'\xc5\x82odz.txt' |
| 367 | |
| 368 | # they transcoded the content at the same time |
| 369 | a = [b'\xa3odz is a city in Poland.'] |
| 370 | b = [b'\xc5\x81odz is a city in Poland.'] |
| 371 | |
| 372 | check = self.check |
| 373 | unified = difflib.unified_diff |
| 374 | context = difflib.context_diff |
| 375 | check(difflib.diff_bytes(unified, a, b, fna, fnb)) |
| 376 | check(difflib.diff_bytes(context, a, b, fna, fnb)) |
| 377 | |
| 378 | def assertDiff(expect, actual): |
| 379 | # do not compare expect and equal as lists, because unittest |
| 380 | # uses difflib to report difference between lists |
| 381 | actual = list(actual) |
| 382 | self.assertEqual(len(expect), len(actual)) |
| 383 | for e, a in zip(expect, actual): |
| 384 | self.assertEqual(e, a) |
| 385 | |
| 386 | expect = [ |
| 387 | b'--- \xb3odz.txt', |
| 388 | b'+++ \xc5\x82odz.txt', |
| 389 | b'@@ -1 +1 @@', |
| 390 | b'-\xa3odz is a city in Poland.', |
| 391 | b'+\xc5\x81odz is a city in Poland.', |
| 392 | ] |
| 393 | actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') |
| 394 | assertDiff(expect, actual) |
| 395 | |
| 396 | # with dates (plain ASCII) |
| 397 | datea = b'2005-03-18' |
| 398 | dateb = b'2005-03-19' |
| 399 | check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) |
| 400 | check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) |
| 401 | |
| 402 | expect = [ |
| 403 | # note the mixed encodings here: this is deeply wrong by every |
| 404 | # tenet of Unicode, but it doesn't crash, it's parseable by |
| 405 | # patch, and it's how UNIX(tm) diff behaves |
| 406 | b'--- \xb3odz.txt\t2005-03-18', |
| 407 | b'+++ \xc5\x82odz.txt\t2005-03-19', |
| 408 | b'@@ -1 +1 @@', |
| 409 | b'-\xa3odz is a city in Poland.', |
| 410 | b'+\xc5\x81odz is a city in Poland.', |
| 411 | ] |
| 412 | actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, |
| 413 | lineterm=b'') |
| 414 | assertDiff(expect, actual) |
| 415 | |
| 416 | def test_mixed_types_content(self): |
| 417 | # type of input content must be consistent: all str or all bytes |
| 418 | a = [b'hello'] |
| 419 | b = ['hello'] |
| 420 | |
| 421 | unified = difflib.unified_diff |
| 422 | context = difflib.context_diff |
| 423 | |
| 424 | expect = "lines to compare must be str, not bytes (b'hello')" |
| 425 | self._assert_type_error(expect, unified, a, b) |
| 426 | self._assert_type_error(expect, unified, b, a) |
| 427 | self._assert_type_error(expect, context, a, b) |
| 428 | self._assert_type_error(expect, context, b, a) |
| 429 | |
| 430 | expect = "all arguments must be bytes, not str ('hello')" |
| 431 | self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) |
| 432 | self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) |
| 433 | self._assert_type_error(expect, difflib.diff_bytes, context, a, b) |
| 434 | self._assert_type_error(expect, difflib.diff_bytes, context, b, a) |
| 435 | |
| 436 | def test_mixed_types_filenames(self): |
| 437 | # cannot pass filenames as bytes if content is str (this may not be |
| 438 | # the right behaviour, but at least the test demonstrates how |
| 439 | # things work) |
| 440 | a = ['hello\n'] |
| 441 | b = ['ohell\n'] |
| 442 | fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 |
| 443 | fnb = b'ol\xc3a9.txt' # to UTF-8 |
| 444 | self._assert_type_error( |
| 445 | "all arguments must be str, not: b'ol\\xe9.txt'", |
| 446 | difflib.unified_diff, a, b, fna, fnb) |
| 447 | |
| 448 | def test_mixed_types_dates(self): |
| 449 | # type of dates must be consistent with type of contents |
| 450 | a = [b'foo\n'] |
| 451 | b = [b'bar\n'] |
| 452 | datea = '1 fév' |
| 453 | dateb = '3 fév' |
| 454 | self._assert_type_error( |
| 455 | "all arguments must be bytes, not str ('1 fév')", |
| 456 | difflib.diff_bytes, difflib.unified_diff, |
| 457 | a, b, b'a', b'b', datea, dateb) |
| 458 | |
| 459 | # if input is str, non-ASCII dates are fine |
| 460 | a = ['foo\n'] |
| 461 | b = ['bar\n'] |
| 462 | list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) |
| 463 | |
| 464 | def _assert_type_error(self, msg, generator, *args): |
| 465 | with self.assertRaises(TypeError) as ctx: |
| 466 | list(generator(*args)) |
| 467 | self.assertEqual(msg, str(ctx.exception)) |
| 468 | |
| 469 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 470 | def test_main(): |
| 471 | difflib.HtmlDiff._default_prefix = 0 |
| 472 | Doctests = doctest.DocTestSuite(difflib) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 473 | run_unittest( |
| 474 | TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, |
Greg Ward | 4d9d256 | 2015-04-20 20:21:21 -0400 | [diff] [blame] | 475 | TestOutputFormat, TestBytes, Doctests) |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 476 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 477 | if __name__ == '__main__': |
| 478 | test_main() |