Barry Warsaw | 04f357c | 2002-07-23 19:04:11 +0000 | [diff] [blame] | 1 | import difflib |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 2 | from test.test_support import run_unittest, findfile |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 3 | import unittest |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 4 | import doctest |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 5 | import sys |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 6 | |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 7 | |
Terry Reedy | d2d2ae9 | 2010-11-11 23:22:19 +0000 | [diff] [blame] | 8 | class TestWithAscii(unittest.TestCase): |
| 9 | def test_one_insert(self): |
| 10 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) |
| 11 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 12 | self.assertEqual(list(sm.get_opcodes()), |
| 13 | [ ('insert', 0, 0, 0, 1), |
| 14 | ('equal', 0, 100, 1, 101)]) |
| 15 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) |
| 16 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 17 | self.assertEqual(list(sm.get_opcodes()), |
| 18 | [ ('equal', 0, 50, 0, 50), |
| 19 | ('insert', 50, 50, 50, 51), |
| 20 | ('equal', 50, 100, 51, 101)]) |
| 21 | |
| 22 | def test_one_delete(self): |
| 23 | sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) |
| 24 | self.assertAlmostEqual(sm.ratio(), 0.994, places=3) |
| 25 | self.assertEqual(list(sm.get_opcodes()), |
| 26 | [ ('equal', 0, 40, 0, 40), |
| 27 | ('delete', 40, 41, 40, 40), |
| 28 | ('equal', 41, 81, 40, 80)]) |
| 29 | |
| 30 | |
| 31 | class TestAutojunk(unittest.TestCase): |
| 32 | """Tests for the autojunk parameter added in 2.7""" |
| 33 | def test_one_insert_homogenous_sequence(self): |
| 34 | # By default autojunk=True and the heuristic kicks in for a sequence |
| 35 | # of length 200+ |
| 36 | seq1 = 'b' * 200 |
| 37 | seq2 = 'a' + 'b' * 200 |
| 38 | |
| 39 | sm = difflib.SequenceMatcher(None, seq1, seq2) |
| 40 | self.assertAlmostEqual(sm.ratio(), 0, places=3) |
| 41 | |
| 42 | # Now turn the heuristic off |
| 43 | sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) |
| 44 | self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) |
| 45 | |
| 46 | |
| 47 | class TestSFbugs(unittest.TestCase): |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 48 | def test_ratio_for_null_seqn(self): |
| 49 | # Check clearing of SF bug 763023 |
| 50 | s = difflib.SequenceMatcher(None, [], []) |
| 51 | self.assertEqual(s.ratio(), 1) |
| 52 | self.assertEqual(s.quick_ratio(), 1) |
| 53 | self.assertEqual(s.real_quick_ratio(), 1) |
| 54 | |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 55 | def test_comparing_empty_lists(self): |
| 56 | # Check fix for bug #979794 |
| 57 | group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() |
| 58 | self.assertRaises(StopIteration, group_gen.next) |
| 59 | diff_gen = difflib.unified_diff([], []) |
| 60 | self.assertRaises(StopIteration, diff_gen.next) |
| 61 | |
Senthil Kumaran | 5c456e6 | 2009-11-23 18:41:31 +0000 | [diff] [blame] | 62 | def test_added_tab_hint(self): |
| 63 | # Check fix for bug #1488943 |
| 64 | diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) |
| 65 | self.assertEqual("- \tI am a buggy", diff[0]) |
| 66 | self.assertEqual("? --\n", diff[1]) |
| 67 | self.assertEqual("+ \t\tI am a bug", diff[2]) |
| 68 | self.assertEqual("? +\n", diff[3]) |
| 69 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 70 | patch914575_from1 = """ |
| 71 | 1. Beautiful is beTTer than ugly. |
| 72 | 2. Explicit is better than implicit. |
| 73 | 3. Simple is better than complex. |
| 74 | 4. Complex is better than complicated. |
| 75 | """ |
| 76 | |
| 77 | patch914575_to1 = """ |
| 78 | 1. Beautiful is better than ugly. |
| 79 | 3. Simple is better than complex. |
| 80 | 4. Complicated is better than complex. |
| 81 | 5. Flat is better than nested. |
| 82 | """ |
| 83 | |
| 84 | patch914575_from2 = """ |
| 85 | \t\tLine 1: preceeded by from:[tt] to:[ssss] |
| 86 | \t\tLine 2: preceeded by from:[sstt] to:[sssst] |
| 87 | \t \tLine 3: preceeded by from:[sstst] to:[ssssss] |
| 88 | Line 4: \thas from:[sst] to:[sss] after : |
| 89 | Line 5: has from:[t] to:[ss] at end\t |
| 90 | """ |
| 91 | |
| 92 | patch914575_to2 = """ |
| 93 | Line 1: preceeded by from:[tt] to:[ssss] |
| 94 | \tLine 2: preceeded by from:[sstt] to:[sssst] |
| 95 | Line 3: preceeded by from:[sstst] to:[ssssss] |
| 96 | Line 4: has from:[sst] to:[sss] after : |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 97 | Line 5: has from:[t] to:[ss] at end |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 98 | """ |
| 99 | |
| 100 | patch914575_from3 = """line 0 |
| 101 | 1234567890123456789012345689012345 |
| 102 | line 1 |
| 103 | line 2 |
| 104 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 105 | line 4 changed |
| 106 | line 5 changed |
| 107 | line 6 changed |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 108 | line 7 |
| 109 | line 8 subtracted |
| 110 | line 9 |
| 111 | 1234567890123456789012345689012345 |
| 112 | short line |
| 113 | just fits in!! |
| 114 | just fits in two lines yup!! |
| 115 | the end""" |
| 116 | |
| 117 | patch914575_to3 = """line 0 |
| 118 | 1234567890123456789012345689012345 |
| 119 | line 1 |
| 120 | line 2 added |
| 121 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 122 | line 4 chanGEd |
| 123 | line 5a chanGed |
| 124 | line 6a changEd |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 125 | line 7 |
| 126 | line 8 |
| 127 | line 9 |
| 128 | 1234567890 |
| 129 | another long line that needs to be wrapped |
| 130 | just fitS in!! |
| 131 | just fits in two lineS yup!! |
| 132 | the end""" |
| 133 | |
| 134 | class TestSFpatches(unittest.TestCase): |
| 135 | |
| 136 | def test_html_diff(self): |
| 137 | # Check SF patch 914575 for generating HTML differences |
| 138 | f1a = ((patch914575_from1 + '123\n'*10)*3) |
| 139 | t1a = (patch914575_to1 + '123\n'*10)*3 |
| 140 | f1b = '456\n'*10 + f1a |
| 141 | t1b = '456\n'*10 + t1a |
| 142 | f1a = f1a.splitlines() |
| 143 | t1a = t1a.splitlines() |
| 144 | f1b = f1b.splitlines() |
| 145 | t1b = t1b.splitlines() |
| 146 | f2 = patch914575_from2.splitlines() |
| 147 | t2 = patch914575_to2.splitlines() |
| 148 | f3 = patch914575_from3 |
| 149 | t3 = patch914575_to3 |
| 150 | i = difflib.HtmlDiff() |
| 151 | j = difflib.HtmlDiff(tabsize=2) |
| 152 | k = difflib.HtmlDiff(wrapcolumn=14) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 153 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 154 | full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) |
| 155 | tables = '\n'.join( |
| 156 | [ |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 157 | '<h2>Context (first diff within numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 158 | i.make_table(f1a,t1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 159 | '<h2>Context (first diff after numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 160 | i.make_table(f1b,t1b,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 161 | '<h2>Context (numlines=6)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 162 | i.make_table(f1a,t1a,'from','to',context=True,numlines=6), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 163 | '<h2>Context (numlines=0)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 164 | i.make_table(f1a,t1a,'from','to',context=True,numlines=0), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 165 | '<h2>Same Context</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 166 | i.make_table(f1a,f1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 167 | '<h2>Same Full</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 168 | i.make_table(f1a,f1a,'from','to',context=False), |
| 169 | '<h2>Empty Context</h2>', |
| 170 | i.make_table([],[],'from','to',context=True), |
| 171 | '<h2>Empty Full</h2>', |
| 172 | i.make_table([],[],'from','to',context=False), |
| 173 | '<h2>tabsize=2</h2>', |
| 174 | j.make_table(f2,t2), |
| 175 | '<h2>tabsize=default</h2>', |
| 176 | i.make_table(f2,t2), |
| 177 | '<h2>Context (wrapcolumn=14,numlines=0)</h2>', |
| 178 | k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), |
| 179 | '<h2>wrapcolumn=14,splitlines()</h2>', |
| 180 | k.make_table(f3.splitlines(),t3.splitlines()), |
| 181 | '<h2>wrapcolumn=14,splitlines(True)</h2>', |
| 182 | k.make_table(f3.splitlines(True),t3.splitlines(True)), |
| 183 | ]) |
| 184 | actual = full.replace('</body>','\n%s\n</body>' % tables) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 185 | |
Philip Jenvey | 6a11102 | 2009-05-28 05:58:44 +0000 | [diff] [blame] | 186 | # temporarily uncomment next two lines to baseline this test |
| 187 | #with open('test_difflib_expect.html','w') as fp: |
| 188 | # fp.write(actual) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 189 | |
Philip Jenvey | 6a11102 | 2009-05-28 05:58:44 +0000 | [diff] [blame] | 190 | with open(findfile('test_difflib_expect.html')) as fp: |
| 191 | self.assertEqual(actual, fp.read()) |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 192 | |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 193 | def test_recursion_limit(self): |
| 194 | # Check if the problem described in patch #1413711 exists. |
| 195 | limit = sys.getrecursionlimit() |
| 196 | old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] |
| 197 | new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] |
| 198 | difflib.SequenceMatcher(None, old, new).get_opcodes() |
| 199 | |
| 200 | |
R. David Murray | 1a14d3d | 2010-04-12 16:35:19 +0000 | [diff] [blame] | 201 | class TestOutputFormat(unittest.TestCase): |
| 202 | def test_tab_delimiter(self): |
| 203 | args = ['one', 'two', 'Original', 'Current', |
| 204 | '2005-01-26 23:30:50', '2010-04-02 10:20:52'] |
| 205 | ud = difflib.unified_diff(*args, lineterm='') |
| 206 | self.assertEqual(list(ud)[0:2], [ |
| 207 | "--- Original\t2005-01-26 23:30:50", |
| 208 | "+++ Current\t2010-04-02 10:20:52"]) |
| 209 | cd = difflib.context_diff(*args, lineterm='') |
| 210 | self.assertEqual(list(cd)[0:2], [ |
| 211 | "*** Original\t2005-01-26 23:30:50", |
| 212 | "--- Current\t2010-04-02 10:20:52"]) |
| 213 | |
| 214 | def test_no_trailing_tab_on_empty_filedate(self): |
| 215 | args = ['one', 'two', 'Original', 'Current'] |
| 216 | ud = difflib.unified_diff(*args, lineterm='') |
| 217 | self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) |
| 218 | |
| 219 | cd = difflib.context_diff(*args, lineterm='') |
| 220 | self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) |
| 221 | |
Raymond Hettinger | 7c9d347 | 2011-04-12 15:48:25 -0700 | [diff] [blame] | 222 | def test_range_format_unified(self): |
| 223 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 224 | spec = '''\ |
| 225 | Each <range> field shall be of the form: |
| 226 | %1d", <beginning line number> if the range contains exactly one line, |
| 227 | and: |
| 228 | "%1d,%1d", <beginning line number>, <number of lines> otherwise. |
| 229 | If a range is empty, its beginning line number shall be the number of |
| 230 | the line just before the range, or 0 if the empty range starts the file. |
| 231 | ''' |
| 232 | fmt = difflib._format_range_unified |
| 233 | self.assertEqual(fmt(3,3), '3,0') |
| 234 | self.assertEqual(fmt(3,4), '4') |
| 235 | self.assertEqual(fmt(3,5), '4,2') |
| 236 | self.assertEqual(fmt(3,6), '4,3') |
| 237 | self.assertEqual(fmt(0,0), '0,0') |
| 238 | |
| 239 | def test_range_format_context(self): |
| 240 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 241 | spec = '''\ |
| 242 | The range of lines in file1 shall be written in the following format |
| 243 | if the range contains two or more lines: |
| 244 | "*** %d,%d ****\n", <beginning line number>, <ending line number> |
| 245 | and the following format otherwise: |
| 246 | "*** %d ****\n", <ending line number> |
| 247 | The ending line number of an empty range shall be the number of the preceding line, |
| 248 | or 0 if the range is at the start of the file. |
| 249 | |
| 250 | Next, the range of lines in file2 shall be written in the following format |
| 251 | if the range contains two or more lines: |
| 252 | "--- %d,%d ----\n", <beginning line number>, <ending line number> |
| 253 | and the following format otherwise: |
| 254 | "--- %d ----\n", <ending line number> |
| 255 | ''' |
| 256 | fmt = difflib._format_range_context |
| 257 | self.assertEqual(fmt(3,3), '3') |
| 258 | self.assertEqual(fmt(3,4), '4') |
| 259 | self.assertEqual(fmt(3,5), '4,5') |
| 260 | self.assertEqual(fmt(3,6), '4,6') |
| 261 | self.assertEqual(fmt(0,0), '0') |
| 262 | |
R. David Murray | 1a14d3d | 2010-04-12 16:35:19 +0000 | [diff] [blame] | 263 | |
Neal Norwitz | ce4a9c9 | 2006-04-09 08:36:46 +0000 | [diff] [blame] | 264 | def test_main(): |
| 265 | difflib.HtmlDiff._default_prefix = 0 |
| 266 | Doctests = doctest.DocTestSuite(difflib) |
Terry Reedy | d2d2ae9 | 2010-11-11 23:22:19 +0000 | [diff] [blame] | 267 | run_unittest( |
| 268 | TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, |
| 269 | TestOutputFormat, Doctests) |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 270 | |
Neal Norwitz | ce4a9c9 | 2006-04-09 08:36:46 +0000 | [diff] [blame] | 271 | if __name__ == '__main__': |
| 272 | test_main() |