Barry Warsaw | 04f357c | 2002-07-23 19:04:11 +0000 | [diff] [blame] | 1 | import difflib |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 2 | from test.test_support import run_unittest, findfile |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 3 | import unittest |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 4 | import doctest |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 5 | import sys |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 6 | |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 7 | |
Terry Reedy | d2d2ae9 | 2010-11-11 23:22:19 +0000 | [diff] [blame] | 8 | class TestWithAscii(unittest.TestCase): |
| 9 | def test_one_insert(self): |
| 10 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) |
| 11 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 12 | self.assertEqual(list(sm.get_opcodes()), |
| 13 | [ ('insert', 0, 0, 0, 1), |
| 14 | ('equal', 0, 100, 1, 101)]) |
| 15 | sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) |
| 16 | self.assertAlmostEqual(sm.ratio(), 0.995, places=3) |
| 17 | self.assertEqual(list(sm.get_opcodes()), |
| 18 | [ ('equal', 0, 50, 0, 50), |
| 19 | ('insert', 50, 50, 50, 51), |
| 20 | ('equal', 50, 100, 51, 101)]) |
| 21 | |
| 22 | def test_one_delete(self): |
| 23 | sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) |
| 24 | self.assertAlmostEqual(sm.ratio(), 0.994, places=3) |
| 25 | self.assertEqual(list(sm.get_opcodes()), |
| 26 | [ ('equal', 0, 40, 0, 40), |
| 27 | ('delete', 40, 41, 40, 40), |
| 28 | ('equal', 41, 81, 40, 80)]) |
| 29 | |
| 30 | |
| 31 | class TestAutojunk(unittest.TestCase): |
| 32 | """Tests for the autojunk parameter added in 2.7""" |
| 33 | def test_one_insert_homogenous_sequence(self): |
| 34 | # By default autojunk=True and the heuristic kicks in for a sequence |
| 35 | # of length 200+ |
| 36 | seq1 = 'b' * 200 |
| 37 | seq2 = 'a' + 'b' * 200 |
| 38 | |
| 39 | sm = difflib.SequenceMatcher(None, seq1, seq2) |
| 40 | self.assertAlmostEqual(sm.ratio(), 0, places=3) |
| 41 | |
| 42 | # Now turn the heuristic off |
| 43 | sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) |
| 44 | self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) |
| 45 | |
| 46 | |
| 47 | class TestSFbugs(unittest.TestCase): |
Neal Norwitz | e7dfe21 | 2003-07-01 14:59:46 +0000 | [diff] [blame] | 48 | def test_ratio_for_null_seqn(self): |
| 49 | # Check clearing of SF bug 763023 |
| 50 | s = difflib.SequenceMatcher(None, [], []) |
| 51 | self.assertEqual(s.ratio(), 1) |
| 52 | self.assertEqual(s.quick_ratio(), 1) |
| 53 | self.assertEqual(s.real_quick_ratio(), 1) |
| 54 | |
Brett Cannon | d2c5b4b | 2004-07-10 23:54:07 +0000 | [diff] [blame] | 55 | def test_comparing_empty_lists(self): |
| 56 | # Check fix for bug #979794 |
| 57 | group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() |
| 58 | self.assertRaises(StopIteration, group_gen.next) |
| 59 | diff_gen = difflib.unified_diff([], []) |
| 60 | self.assertRaises(StopIteration, diff_gen.next) |
| 61 | |
Raymond Hettinger | 4575010 | 2014-06-21 11:27:36 -0700 | [diff] [blame] | 62 | def test_matching_blocks_cache(self): |
| 63 | # Issue #21635 |
| 64 | s = difflib.SequenceMatcher(None, "abxcd", "abcd") |
| 65 | first = s.get_matching_blocks() |
| 66 | second = s.get_matching_blocks() |
| 67 | self.assertEqual(second[0].size, 2) |
| 68 | self.assertEqual(second[1].size, 2) |
| 69 | self.assertEqual(second[2].size, 0) |
| 70 | |
Senthil Kumaran | 5c456e6 | 2009-11-23 18:41:31 +0000 | [diff] [blame] | 71 | def test_added_tab_hint(self): |
| 72 | # Check fix for bug #1488943 |
| 73 | diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) |
| 74 | self.assertEqual("- \tI am a buggy", diff[0]) |
| 75 | self.assertEqual("? --\n", diff[1]) |
| 76 | self.assertEqual("+ \t\tI am a bug", diff[2]) |
| 77 | self.assertEqual("? +\n", diff[3]) |
| 78 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 79 | patch914575_from1 = """ |
| 80 | 1. Beautiful is beTTer than ugly. |
| 81 | 2. Explicit is better than implicit. |
| 82 | 3. Simple is better than complex. |
| 83 | 4. Complex is better than complicated. |
| 84 | """ |
| 85 | |
| 86 | patch914575_to1 = """ |
| 87 | 1. Beautiful is better than ugly. |
| 88 | 3. Simple is better than complex. |
| 89 | 4. Complicated is better than complex. |
| 90 | 5. Flat is better than nested. |
| 91 | """ |
| 92 | |
| 93 | patch914575_from2 = """ |
| 94 | \t\tLine 1: preceeded by from:[tt] to:[ssss] |
| 95 | \t\tLine 2: preceeded by from:[sstt] to:[sssst] |
| 96 | \t \tLine 3: preceeded by from:[sstst] to:[ssssss] |
| 97 | Line 4: \thas from:[sst] to:[sss] after : |
| 98 | Line 5: has from:[t] to:[ss] at end\t |
| 99 | """ |
| 100 | |
| 101 | patch914575_to2 = """ |
| 102 | Line 1: preceeded by from:[tt] to:[ssss] |
| 103 | \tLine 2: preceeded by from:[sstt] to:[sssst] |
| 104 | Line 3: preceeded by from:[sstst] to:[ssssss] |
| 105 | Line 4: has from:[sst] to:[sss] after : |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 106 | Line 5: has from:[t] to:[ss] at end |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 107 | """ |
| 108 | |
| 109 | patch914575_from3 = """line 0 |
| 110 | 1234567890123456789012345689012345 |
| 111 | line 1 |
| 112 | line 2 |
| 113 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 114 | line 4 changed |
| 115 | line 5 changed |
| 116 | line 6 changed |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 117 | line 7 |
| 118 | line 8 subtracted |
| 119 | line 9 |
| 120 | 1234567890123456789012345689012345 |
| 121 | short line |
| 122 | just fits in!! |
| 123 | just fits in two lines yup!! |
| 124 | the end""" |
| 125 | |
| 126 | patch914575_to3 = """line 0 |
| 127 | 1234567890123456789012345689012345 |
| 128 | line 1 |
| 129 | line 2 added |
| 130 | line 3 |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 131 | line 4 chanGEd |
| 132 | line 5a chanGed |
| 133 | line 6a changEd |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 134 | line 7 |
| 135 | line 8 |
| 136 | line 9 |
| 137 | 1234567890 |
| 138 | another long line that needs to be wrapped |
| 139 | just fitS in!! |
| 140 | just fits in two lineS yup!! |
| 141 | the end""" |
| 142 | |
| 143 | class TestSFpatches(unittest.TestCase): |
| 144 | |
| 145 | def test_html_diff(self): |
| 146 | # Check SF patch 914575 for generating HTML differences |
| 147 | f1a = ((patch914575_from1 + '123\n'*10)*3) |
| 148 | t1a = (patch914575_to1 + '123\n'*10)*3 |
| 149 | f1b = '456\n'*10 + f1a |
| 150 | t1b = '456\n'*10 + t1a |
| 151 | f1a = f1a.splitlines() |
| 152 | t1a = t1a.splitlines() |
| 153 | f1b = f1b.splitlines() |
| 154 | t1b = t1b.splitlines() |
| 155 | f2 = patch914575_from2.splitlines() |
| 156 | t2 = patch914575_to2.splitlines() |
| 157 | f3 = patch914575_from3 |
| 158 | t3 = patch914575_to3 |
| 159 | i = difflib.HtmlDiff() |
| 160 | j = difflib.HtmlDiff(tabsize=2) |
| 161 | k = difflib.HtmlDiff(wrapcolumn=14) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 162 | |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 163 | full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) |
| 164 | tables = '\n'.join( |
| 165 | [ |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 166 | '<h2>Context (first diff within numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 167 | i.make_table(f1a,t1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 168 | '<h2>Context (first diff after numlines=5(default))</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 169 | i.make_table(f1b,t1b,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 170 | '<h2>Context (numlines=6)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 171 | i.make_table(f1a,t1a,'from','to',context=True,numlines=6), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 172 | '<h2>Context (numlines=0)</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 173 | i.make_table(f1a,t1a,'from','to',context=True,numlines=0), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 174 | '<h2>Same Context</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 175 | i.make_table(f1a,f1a,'from','to',context=True), |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 176 | '<h2>Same Full</h2>', |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 177 | i.make_table(f1a,f1a,'from','to',context=False), |
| 178 | '<h2>Empty Context</h2>', |
| 179 | i.make_table([],[],'from','to',context=True), |
| 180 | '<h2>Empty Full</h2>', |
| 181 | i.make_table([],[],'from','to',context=False), |
| 182 | '<h2>tabsize=2</h2>', |
| 183 | j.make_table(f2,t2), |
| 184 | '<h2>tabsize=default</h2>', |
| 185 | i.make_table(f2,t2), |
| 186 | '<h2>Context (wrapcolumn=14,numlines=0)</h2>', |
| 187 | k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), |
| 188 | '<h2>wrapcolumn=14,splitlines()</h2>', |
| 189 | k.make_table(f3.splitlines(),t3.splitlines()), |
| 190 | '<h2>wrapcolumn=14,splitlines(True)</h2>', |
| 191 | k.make_table(f3.splitlines(True),t3.splitlines(True)), |
| 192 | ]) |
| 193 | actual = full.replace('</body>','\n%s\n</body>' % tables) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 194 | |
Philip Jenvey | 6a11102 | 2009-05-28 05:58:44 +0000 | [diff] [blame] | 195 | # temporarily uncomment next two lines to baseline this test |
| 196 | #with open('test_difflib_expect.html','w') as fp: |
| 197 | # fp.write(actual) |
Tim Peters | 48bd7f3 | 2004-08-29 22:38:38 +0000 | [diff] [blame] | 198 | |
Philip Jenvey | 6a11102 | 2009-05-28 05:58:44 +0000 | [diff] [blame] | 199 | with open(findfile('test_difflib_expect.html')) as fp: |
| 200 | self.assertEqual(actual, fp.read()) |
Martin v. Löwis | e064b41 | 2004-08-29 16:34:40 +0000 | [diff] [blame] | 201 | |
Gustavo Niemeyer | 54814881 | 2006-01-31 18:34:13 +0000 | [diff] [blame] | 202 | def test_recursion_limit(self): |
| 203 | # Check if the problem described in patch #1413711 exists. |
| 204 | limit = sys.getrecursionlimit() |
| 205 | old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] |
| 206 | new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] |
| 207 | difflib.SequenceMatcher(None, old, new).get_opcodes() |
| 208 | |
| 209 | |
R. David Murray | 1a14d3d | 2010-04-12 16:35:19 +0000 | [diff] [blame] | 210 | class TestOutputFormat(unittest.TestCase): |
| 211 | def test_tab_delimiter(self): |
| 212 | args = ['one', 'two', 'Original', 'Current', |
| 213 | '2005-01-26 23:30:50', '2010-04-02 10:20:52'] |
| 214 | ud = difflib.unified_diff(*args, lineterm='') |
| 215 | self.assertEqual(list(ud)[0:2], [ |
| 216 | "--- Original\t2005-01-26 23:30:50", |
| 217 | "+++ Current\t2010-04-02 10:20:52"]) |
| 218 | cd = difflib.context_diff(*args, lineterm='') |
| 219 | self.assertEqual(list(cd)[0:2], [ |
| 220 | "*** Original\t2005-01-26 23:30:50", |
| 221 | "--- Current\t2010-04-02 10:20:52"]) |
| 222 | |
| 223 | def test_no_trailing_tab_on_empty_filedate(self): |
| 224 | args = ['one', 'two', 'Original', 'Current'] |
| 225 | ud = difflib.unified_diff(*args, lineterm='') |
| 226 | self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) |
| 227 | |
| 228 | cd = difflib.context_diff(*args, lineterm='') |
| 229 | self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) |
| 230 | |
Raymond Hettinger | 7c9d347 | 2011-04-12 15:48:25 -0700 | [diff] [blame] | 231 | def test_range_format_unified(self): |
| 232 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 233 | spec = '''\ |
| 234 | Each <range> field shall be of the form: |
| 235 | %1d", <beginning line number> if the range contains exactly one line, |
| 236 | and: |
| 237 | "%1d,%1d", <beginning line number>, <number of lines> otherwise. |
| 238 | If a range is empty, its beginning line number shall be the number of |
| 239 | the line just before the range, or 0 if the empty range starts the file. |
| 240 | ''' |
| 241 | fmt = difflib._format_range_unified |
| 242 | self.assertEqual(fmt(3,3), '3,0') |
| 243 | self.assertEqual(fmt(3,4), '4') |
| 244 | self.assertEqual(fmt(3,5), '4,2') |
| 245 | self.assertEqual(fmt(3,6), '4,3') |
| 246 | self.assertEqual(fmt(0,0), '0,0') |
| 247 | |
| 248 | def test_range_format_context(self): |
| 249 | # Per the diff spec at http://www.unix.org/single_unix_specification/ |
| 250 | spec = '''\ |
| 251 | The range of lines in file1 shall be written in the following format |
| 252 | if the range contains two or more lines: |
| 253 | "*** %d,%d ****\n", <beginning line number>, <ending line number> |
| 254 | and the following format otherwise: |
| 255 | "*** %d ****\n", <ending line number> |
| 256 | The ending line number of an empty range shall be the number of the preceding line, |
| 257 | or 0 if the range is at the start of the file. |
| 258 | |
| 259 | Next, the range of lines in file2 shall be written in the following format |
| 260 | if the range contains two or more lines: |
| 261 | "--- %d,%d ----\n", <beginning line number>, <ending line number> |
| 262 | and the following format otherwise: |
| 263 | "--- %d ----\n", <ending line number> |
| 264 | ''' |
| 265 | fmt = difflib._format_range_context |
| 266 | self.assertEqual(fmt(3,3), '3') |
| 267 | self.assertEqual(fmt(3,4), '4') |
| 268 | self.assertEqual(fmt(3,5), '4,5') |
| 269 | self.assertEqual(fmt(3,6), '4,6') |
| 270 | self.assertEqual(fmt(0,0), '0') |
| 271 | |
R. David Murray | 1a14d3d | 2010-04-12 16:35:19 +0000 | [diff] [blame] | 272 | |
Neal Norwitz | ce4a9c9 | 2006-04-09 08:36:46 +0000 | [diff] [blame] | 273 | def test_main(): |
| 274 | difflib.HtmlDiff._default_prefix = 0 |
| 275 | Doctests = doctest.DocTestSuite(difflib) |
Terry Reedy | d2d2ae9 | 2010-11-11 23:22:19 +0000 | [diff] [blame] | 276 | run_unittest( |
| 277 | TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs, |
| 278 | TestOutputFormat, Doctests) |
Raymond Hettinger | 43d790c | 2003-07-16 04:34:56 +0000 | [diff] [blame] | 279 | |
Neal Norwitz | ce4a9c9 | 2006-04-09 08:36:46 +0000 | [diff] [blame] | 280 | if __name__ == '__main__': |
| 281 | test_main() |