Barry Warsaw | 04f357c | 2002-07-23 19:04:11 +0000 | [diff] [blame] | 1 | import difflib |
Benjamin Peterson | ee8712c | 2008-05-20 21:35:26 +0000 | [diff] [blame] | 2 | from test.support import run_unittest, findfile |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 3 | import unittest |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 4 | import doctest |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 5 | import sys |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 6 | |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 7 | |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 8 | class TestWithAscii(unittest.TestCase): |
| 9 | def test_one_insert(self): |
| 10 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) |
| 11 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 12 | self.assertEqual(list(sm.get_opcodes()), |
| 13 | [ ('insert', 0, 0, 0, 1), |
| 14 | ('equal', 0, 100, 1, 101)]) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 15 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 16 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) |
| 17 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 18 | self.assertEqual(list(sm.get_opcodes()), |
| 19 | [ ('equal', 0, 50, 0, 50), |
| 20 | ('insert', 50, 50, 50, 51), |
| 21 | ('equal', 50, 100, 51, 101)]) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 22 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 23 | |
| 24 | def test_one_delete(self): |
| 25 | sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) |
| 26 | self.assertAlmostEqual(sm.ratio(), 0.994, places=3) |
| 27 | self.assertEqual(list(sm.get_opcodes()), |
| 28 | [ ('equal', 0, 40, 0, 40), |
| 29 | ('delete', 40, 41, 40, 40), |
| 30 | ('equal', 41, 81, 40, 80)]) |
| 31 | |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 32 | def test_bjunk(self): |
| 33 | sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| 34 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) |
| 35 | self.assertEqual(sm.bjunk, set()) |
| 36 | |
| 37 | sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', |
| 38 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| 39 | self.assertEqual(sm.bjunk, {' '}) |
| 40 | |
| 41 | sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'], |
| 42 | a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) |
| 43 | self.assertEqual(sm.bjunk, {' ', 'b'}) |
| 44 | |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 45 | |
| 46 | class TestAutojunk(unittest.TestCase): |
| 47 | """Tests for the autojunk parameter added in 2.7""" |
| 48 | def test_one_insert_homogenous_sequence(self): |
| 49 | # By default autojunk=True and the heuristic kicks in for a sequence |
| 50 | # of length 200+ |
| 51 | seq1 = 'b' * 200 |
| 52 | seq2 = 'a' + 'b' * 200 |
| 53 | |
| 54 | sm = difflib.SequenceMatcher(None, seq1, seq2) |
| 55 | self.assertAlmostEqual(sm.ratio(), 0, places=3) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 56 | self.assertEqual(sm.bpopular, {'b'}) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 57 | |
| 58 | # Now turn the heuristic off |
| 59 | sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) |
| 60 | self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) |
Terry Reedy | 17a5925 | 2010-12-15 20:18:10 +0000 | [diff] [blame] | 61 | self.assertEqual(sm.bpopular, set()) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 62 | |
| 63 | |
| 64 | class TestSFbugs(unittest.TestCase): |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 65 | def test_ratio_for_null_seqn(self): |
| 66 | # Check clearing of SF bug 763023 |
| 67 | s = difflib.SequenceMatcher(None, [], []) |
| 68 | self.assertEqual(s.ratio(), 1) |
| 69 | self.assertEqual(s.quick_ratio(), 1) |
| 70 | self.assertEqual(s.real_quick_ratio(), 1) |
| 71 | |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 72 | def test_comparing_empty_lists(self): |
| 73 | # Check fix for bug #979794 |
| 74 | group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() |
Georg Brandl | a18af4e | 2007-04-21 15:47:16 +0000 | [diff] [blame] | 75 | self.assertRaises(StopIteration, next, group_gen) |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 76 | diff_gen = difflib.unified_diff([], []) |
Georg Brandl | a18af4e | 2007-04-21 15:47:16 +0000 | [diff] [blame] | 77 | self.assertRaises(StopIteration, next, diff_gen) |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 78 | |
Senthil Kumaran | 758025c | 2009-11-23 19:02:52 +0000 | [diff] [blame] | 79 | def test_added_tab_hint(self): |
| 80 | # Check fix for bug #1488943 |
| 81 | diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) |
| 82 | self.assertEqual("- \tI am a buggy", diff[0]) |
| 83 | self.assertEqual("? --\n", diff[1]) |
| 84 | self.assertEqual("+ \t\tI am a bug", diff[2]) |
| 85 | self.assertEqual("? +\n", diff[3]) |
| 86 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 87 | patch914575_from1 = """ |
| 88 | 1. Beautiful is beTTer than ugly. |
| 89 | 2. Explicit is better than implicit. |
| 90 | 3. Simple is better than complex. |
| 91 | 4. Complex is better than complicated. |
| 92 | """ |
| 93 | |
| 94 | patch914575_to1 = """ |
| 95 | 1. Beautiful is better than ugly. |
| 96 | 3. Simple is better than complex. |
| 97 | 4. Complicated is better than complex. |
| 98 | 5. Flat is better than nested. |
| 99 | """ |
| 100 | |
| 101 | patch914575_from2 = """ |
| 102 | \t\tLine 1: preceeded by from:[tt] to:[ssss] |
| 103 | \t\tLine 2: preceeded by from:[sstt] to:[sssst] |
| 104 | \t \tLine 3: preceeded by from:[sstst] to:[ssssss] |
| 105 | Line 4: \thas from:[sst] to:[sss] after : |
| 106 | Line 5: has from:[t] to:[ss] at end\t |
| 107 | """ |
| 108 | |
| 109 | patch914575_to2 = """ |
| 110 | Line 1: preceeded by from:[tt] to:[ssss] |
| 111 | \tLine 2: preceeded by from:[sstt] to:[sssst] |
| 112 | Line 3: preceeded by from:[sstst] to:[ssssss] |
| 113 | Line 4: has from:[sst] to:[sss] after : |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 114 | Line 5: has from:[t] to:[ss] at end |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 115 | """ |
| 116 | |
| 117 | patch914575_from3 = """line 0 |
| 118 | 1234567890123456789012345689012345 |
| 119 | line 1 |
| 120 | line 2 |
| 121 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 122 | line 4 changed |
| 123 | line 5 changed |
| 124 | line 6 changed |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 125 | line 7 |
| 126 | line 8 subtracted |
| 127 | line 9 |
| 128 | 1234567890123456789012345689012345 |
| 129 | short line |
| 130 | just fits in!! |
| 131 | just fits in two lines yup!! |
| 132 | the end""" |
| 133 | |
| 134 | patch914575_to3 = """line 0 |
| 135 | 1234567890123456789012345689012345 |
| 136 | line 1 |
| 137 | line 2 added |
| 138 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 139 | line 4 chanGEd |
| 140 | line 5a chanGed |
| 141 | line 6a changEd |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 142 | line 7 |
| 143 | line 8 |
| 144 | line 9 |
| 145 | 1234567890 |
| 146 | another long line that needs to be wrapped |
| 147 | just fitS in!! |
| 148 | just fits in two lineS yup!! |
| 149 | the end""" |
| 150 | |
| 151 | class TestSFpatches(unittest.TestCase): |
| 152 | |
| 153 | def test_html_diff(self): |
| 154 | # Check SF patch 914575 for generating HTML differences |
| 155 | f1a = ((patch914575_from1 + '123\n'*10)*3) |
| 156 | t1a = (patch914575_to1 + '123\n'*10)*3 |
| 157 | f1b = '456\n'*10 + f1a |
| 158 | t1b = '456\n'*10 + t1a |
| 159 | f1a = f1a.splitlines() |
| 160 | t1a = t1a.splitlines() |
| 161 | f1b = f1b.splitlines() |
| 162 | t1b = t1b.splitlines() |
| 163 | f2 = patch914575_from2.splitlines() |
| 164 | t2 = patch914575_to2.splitlines() |
| 165 | f3 = patch914575_from3 |
| 166 | t3 = patch914575_to3 |
| 167 | i = difflib.HtmlDiff() |
| 168 | j = difflib.HtmlDiff(tabsize=2) |
| 169 | k = difflib.HtmlDiff(wrapcolumn=14) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 170 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 171 | full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) |
| 172 | tables = '\n'.join( |
| 173 | [ |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 174 | '<h2>Context (first diff within numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 175 | i.make_table(f1a,t1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 176 | '<h2>Context (first diff after numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 177 | i.make_table(f1b,t1b,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 178 | '<h2>Context (numlines=6)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 179 | i.make_table(f1a,t1a,'from','to',context=True,numlines=6), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 180 | '<h2>Context (numlines=0)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 181 | i.make_table(f1a,t1a,'from','to',context=True,numlines=0), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 182 | '<h2>Same Context</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 183 | i.make_table(f1a,f1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 184 | '<h2>Same Full</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 185 | i.make_table(f1a,f1a,'from','to',context=False), |
| 186 | '<h2>Empty Context</h2>', |
| 187 | i.make_table([],[],'from','to',context=True), |
| 188 | '<h2>Empty Full</h2>', |
| 189 | i.make_table([],[],'from','to',context=False), |
| 190 | '<h2>tabsize=2</h2>', |
| 191 | j.make_table(f2,t2), |
| 192 | '<h2>tabsize=default</h2>', |
| 193 | i.make_table(f2,t2), |
| 194 | '<h2>Context (wrapcolumn=14,numlines=0)</h2>', |
| 195 | k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), |
| 196 | '<h2>wrapcolumn=14,splitlines()</h2>', |
| 197 | k.make_table(f3.splitlines(),t3.splitlines()), |
| 198 | '<h2>wrapcolumn=14,splitlines(True)</h2>', |
| 199 | k.make_table(f3.splitlines(True),t3.splitlines(True)), |
| 200 | ]) |
| 201 | actual = full.replace('</body>','\n%s\n</body>' % tables) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 202 | |
Philip Jenvey | a27c5bd | 2009-05-28 06:09:08 +0000 | [diff] [blame] | 203 | # temporarily uncomment next two lines to baseline this test |
| 204 | #with open('test_difflib_expect.html','w') as fp: |
| 205 | # fp.write(actual) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 206 | |
Philip Jenvey | a27c5bd | 2009-05-28 06:09:08 +0000 | [diff] [blame] | 207 | with open(findfile('test_difflib_expect.html')) as fp: |
| 208 | self.assertEqual(actual, fp.read()) |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 209 | |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 210 | def test_recursion_limit(self): |
| 211 | # Check if the problem described in patch #1413711 exists. |
| 212 | limit = sys.getrecursionlimit() |
| 213 | old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] |
| 214 | new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] |
| 215 | difflib.SequenceMatcher(None, old, new).get_opcodes() |
| 216 | |
| 217 | |
R. David Murray | b2416e5 | 2010-04-12 16:58:02 +0000 | [diff] [blame] | 218 | class TestOutputFormat(unittest.TestCase): |
| 219 | def test_tab_delimiter(self): |
| 220 | args = ['one', 'two', 'Original', 'Current', |
| 221 | '2005-01-26 23:30:50', '2010-04-02 10:20:52'] |
| 222 | ud = difflib.unified_diff(*args, lineterm='') |
| 223 | self.assertEqual(list(ud)[0:2], [ |
| 224 | "--- Original\t2005-01-26 23:30:50", |
| 225 | "+++ Current\t2010-04-02 10:20:52"]) |
| 226 | cd = difflib.context_diff(*args, lineterm='') |
| 227 | self.assertEqual(list(cd)[0:2], [ |
| 228 | "*** Original\t2005-01-26 23:30:50", |
| 229 | "--- Current\t2010-04-02 10:20:52"]) |
| 230 | |
| 231 | def test_no_trailing_tab_on_empty_filedate(self): |
| 232 | args = ['one', 'two', 'Original', 'Current'] |
| 233 | ud = difflib.unified_diff(*args, lineterm='') |
| 234 | self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) |
| 235 | |
| 236 | cd = difflib.context_diff(*args, lineterm='') |
| 237 | self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) |
| 238 | |
Raymond Hettinger | f03d302 | 2011-04-12 15:19:33 -0700 | [diff] [blame] | 239 | def test_range_format_unified(self): |
Raymond Hettinger | 49353d0 | 2011-04-11 12:40:58 -0700 | [diff] [blame] | 240 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 241 | spec = '''\ |
| 242 | Each <range> field shall be of the form: |
| 243 | %1d", <beginning line number> if the range contains exactly one line, |
| 244 | and: |
| 245 | "%1d,%1d", <beginning line number>, <number of lines> otherwise. |
| 246 | If a range is empty, its beginning line number shall be the number of |
| 247 | the line just before the range, or 0 if the empty range starts the file. |
| 248 | ''' |
Raymond Hettinger | f03d302 | 2011-04-12 15:19:33 -0700 | [diff] [blame] | 249 | fmt = difflib._format_range_unified |
Raymond Hettinger | 49353d0 | 2011-04-11 12:40:58 -0700 | [diff] [blame] | 250 | self.assertEqual(fmt(3,3), '3,0') |
| 251 | self.assertEqual(fmt(3,4), '4') |
| 252 | self.assertEqual(fmt(3,5), '4,2') |
| 253 | self.assertEqual(fmt(3,6), '4,3') |
| 254 | self.assertEqual(fmt(0,0), '0,0') |
R. David Murray | b2416e5 | 2010-04-12 16:58:02 +0000 | [diff] [blame] | 255 | |
Raymond Hettinger | f03d302 | 2011-04-12 15:19:33 -0700 | [diff] [blame] | 256 | def test_range_format_context(self): |
| 257 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 258 | spec = '''\ |
| 259 | The range of lines in file1 shall be written in the following format |
| 260 | if the range contains two or more lines: |
| 261 | "*** %d,%d ****\n", <beginning line number>, <ending line number> |
| 262 | and the following format otherwise: |
| 263 | "*** %d ****\n", <ending line number> |
| 264 | The ending line number of an empty range shall be the number of the preceding line, |
| 265 | or 0 if the range is at the start of the file. |
| 266 | |
| 267 | Next, the range of lines in file2 shall be written in the following format |
| 268 | if the range contains two or more lines: |
| 269 | "--- %d,%d ----\n", <beginning line number>, <ending line number> |
| 270 | and the following format otherwise: |
| 271 | "--- %d ----\n", <ending line number> |
| 272 | ''' |
| 273 | fmt = difflib._format_range_context |
| 274 | self.assertEqual(fmt(3,3), '3') |
| 275 | self.assertEqual(fmt(3,4), '4') |
| 276 | self.assertEqual(fmt(3,5), '4,5') |
| 277 | self.assertEqual(fmt(3,6), '4,6') |
| 278 | self.assertEqual(fmt(0,0), '0') |
| 279 | |
| 280 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 281 | def test_main(): |
| 282 | difflib.HtmlDiff._default_prefix = 0 |
| 283 | Doctests = doctest.DocTestSuite(difflib) |
Terry Reedy | 99f9637 | 2010-11-25 06:12:34 +0000 | [diff] [blame] | 284 | run_unittest( |
| 285 | TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, |
| 286 | TestOutputFormat, Doctests) |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 287 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 288 | if __name__ == '__main__': |
| 289 | test_main() |