blob: 0ba8f0e05bcf471668cbe4994a6d94fb8114a0ed [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001import difflib
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test.support import run_unittest, findfile
Neal Norwitze7dfe212003-07-01 14:59:46 +00003import unittest
Raymond Hettinger43d790c2003-07-16 04:34:56 +00004import doctest
Gustavo Niemeyer548148812006-01-31 18:34:13 +00005import sys
Neal Norwitze7dfe212003-07-01 14:59:46 +00006
Neal Norwitze7dfe212003-07-01 14:59:46 +00007
Terry Reedy99f96372010-11-25 06:12:34 +00008class TestWithAscii(unittest.TestCase):
9 def test_one_insert(self):
10 sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12 self.assertEqual(list(sm.get_opcodes()),
13 [ ('insert', 0, 0, 0, 1),
14 ('equal', 0, 100, 1, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000015 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000016 sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18 self.assertEqual(list(sm.get_opcodes()),
19 [ ('equal', 0, 50, 0, 50),
20 ('insert', 50, 50, 50, 51),
21 ('equal', 50, 100, 51, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000022 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000023
24 def test_one_delete(self):
25 sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26 self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27 self.assertEqual(list(sm.get_opcodes()),
28 [ ('equal', 0, 40, 0, 40),
29 ('delete', 40, 41, 40, 40),
30 ('equal', 41, 81, 40, 80)])
31
Terry Reedy17a59252010-12-15 20:18:10 +000032 def test_bjunk(self):
33 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35 self.assertEqual(sm.bjunk, set())
36
37 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39 self.assertEqual(sm.bjunk, {' '})
40
41 sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43 self.assertEqual(sm.bjunk, {' ', 'b'})
44
Terry Reedy99f96372010-11-25 06:12:34 +000045
46class TestAutojunk(unittest.TestCase):
47 """Tests for the autojunk parameter added in 2.7"""
48 def test_one_insert_homogenous_sequence(self):
49 # By default autojunk=True and the heuristic kicks in for a sequence
50 # of length 200+
51 seq1 = 'b' * 200
52 seq2 = 'a' + 'b' * 200
53
54 sm = difflib.SequenceMatcher(None, seq1, seq2)
55 self.assertAlmostEqual(sm.ratio(), 0, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000056 self.assertEqual(sm.bpopular, {'b'})
Terry Reedy99f96372010-11-25 06:12:34 +000057
58 # Now turn the heuristic off
59 sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60 self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000061 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000062
63
64class TestSFbugs(unittest.TestCase):
Neal Norwitze7dfe212003-07-01 14:59:46 +000065 def test_ratio_for_null_seqn(self):
66 # Check clearing of SF bug 763023
67 s = difflib.SequenceMatcher(None, [], [])
68 self.assertEqual(s.ratio(), 1)
69 self.assertEqual(s.quick_ratio(), 1)
70 self.assertEqual(s.real_quick_ratio(), 1)
71
Brett Cannond2c5b4b2004-07-10 23:54:07 +000072 def test_comparing_empty_lists(self):
73 # Check fix for bug #979794
74 group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
Georg Brandla18af4e2007-04-21 15:47:16 +000075 self.assertRaises(StopIteration, next, group_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000076 diff_gen = difflib.unified_diff([], [])
Georg Brandla18af4e2007-04-21 15:47:16 +000077 self.assertRaises(StopIteration, next, diff_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000078
Raymond Hettingerfabefc32014-06-21 11:57:36 -070079 def test_matching_blocks_cache(self):
80 # Issue #21635
81 s = difflib.SequenceMatcher(None, "abxcd", "abcd")
82 first = s.get_matching_blocks()
83 second = s.get_matching_blocks()
84 self.assertEqual(second[0].size, 2)
85 self.assertEqual(second[1].size, 2)
86 self.assertEqual(second[2].size, 0)
87
Senthil Kumaran758025c2009-11-23 19:02:52 +000088 def test_added_tab_hint(self):
89 # Check fix for bug #1488943
90 diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
91 self.assertEqual("- \tI am a buggy", diff[0])
92 self.assertEqual("? --\n", diff[1])
93 self.assertEqual("+ \t\tI am a bug", diff[2])
94 self.assertEqual("? +\n", diff[3])
95
Martin v. Löwise064b412004-08-29 16:34:40 +000096patch914575_from1 = """
97 1. Beautiful is beTTer than ugly.
98 2. Explicit is better than implicit.
99 3. Simple is better than complex.
100 4. Complex is better than complicated.
101"""
102
103patch914575_to1 = """
104 1. Beautiful is better than ugly.
105 3. Simple is better than complex.
106 4. Complicated is better than complex.
107 5. Flat is better than nested.
108"""
109
110patch914575_from2 = """
111\t\tLine 1: preceeded by from:[tt] to:[ssss]
112 \t\tLine 2: preceeded by from:[sstt] to:[sssst]
113 \t \tLine 3: preceeded by from:[sstst] to:[ssssss]
114Line 4: \thas from:[sst] to:[sss] after :
115Line 5: has from:[t] to:[ss] at end\t
116"""
117
118patch914575_to2 = """
119 Line 1: preceeded by from:[tt] to:[ssss]
120 \tLine 2: preceeded by from:[sstt] to:[sssst]
121 Line 3: preceeded by from:[sstst] to:[ssssss]
122Line 4: has from:[sst] to:[sss] after :
Tim Peters48bd7f32004-08-29 22:38:38 +0000123Line 5: has from:[t] to:[ss] at end
Martin v. Löwise064b412004-08-29 16:34:40 +0000124"""
125
126patch914575_from3 = """line 0
1271234567890123456789012345689012345
128line 1
129line 2
130line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000131line 4 changed
132line 5 changed
133line 6 changed
Martin v. Löwise064b412004-08-29 16:34:40 +0000134line 7
135line 8 subtracted
136line 9
1371234567890123456789012345689012345
138short line
139just fits in!!
140just fits in two lines yup!!
141the end"""
142
143patch914575_to3 = """line 0
1441234567890123456789012345689012345
145line 1
146line 2 added
147line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000148line 4 chanGEd
149line 5a chanGed
150line 6a changEd
Martin v. Löwise064b412004-08-29 16:34:40 +0000151line 7
152line 8
153line 9
1541234567890
155another long line that needs to be wrapped
156just fitS in!!
157just fits in two lineS yup!!
158the end"""
159
160class TestSFpatches(unittest.TestCase):
161
162 def test_html_diff(self):
163 # Check SF patch 914575 for generating HTML differences
164 f1a = ((patch914575_from1 + '123\n'*10)*3)
165 t1a = (patch914575_to1 + '123\n'*10)*3
166 f1b = '456\n'*10 + f1a
167 t1b = '456\n'*10 + t1a
168 f1a = f1a.splitlines()
169 t1a = t1a.splitlines()
170 f1b = f1b.splitlines()
171 t1b = t1b.splitlines()
172 f2 = patch914575_from2.splitlines()
173 t2 = patch914575_to2.splitlines()
174 f3 = patch914575_from3
175 t3 = patch914575_to3
176 i = difflib.HtmlDiff()
177 j = difflib.HtmlDiff(tabsize=2)
178 k = difflib.HtmlDiff(wrapcolumn=14)
Tim Peters48bd7f32004-08-29 22:38:38 +0000179
Martin v. Löwise064b412004-08-29 16:34:40 +0000180 full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
181 tables = '\n'.join(
182 [
Tim Peters48bd7f32004-08-29 22:38:38 +0000183 '<h2>Context (first diff within numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000184 i.make_table(f1a,t1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000185 '<h2>Context (first diff after numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000186 i.make_table(f1b,t1b,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000187 '<h2>Context (numlines=6)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000188 i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
Tim Peters48bd7f32004-08-29 22:38:38 +0000189 '<h2>Context (numlines=0)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000190 i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
Tim Peters48bd7f32004-08-29 22:38:38 +0000191 '<h2>Same Context</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000192 i.make_table(f1a,f1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000193 '<h2>Same Full</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000194 i.make_table(f1a,f1a,'from','to',context=False),
195 '<h2>Empty Context</h2>',
196 i.make_table([],[],'from','to',context=True),
197 '<h2>Empty Full</h2>',
198 i.make_table([],[],'from','to',context=False),
199 '<h2>tabsize=2</h2>',
200 j.make_table(f2,t2),
201 '<h2>tabsize=default</h2>',
202 i.make_table(f2,t2),
203 '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
204 k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
205 '<h2>wrapcolumn=14,splitlines()</h2>',
206 k.make_table(f3.splitlines(),t3.splitlines()),
207 '<h2>wrapcolumn=14,splitlines(True)</h2>',
208 k.make_table(f3.splitlines(True),t3.splitlines(True)),
209 ])
210 actual = full.replace('</body>','\n%s\n</body>' % tables)
Tim Peters48bd7f32004-08-29 22:38:38 +0000211
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000212 # temporarily uncomment next two lines to baseline this test
213 #with open('test_difflib_expect.html','w') as fp:
214 # fp.write(actual)
Tim Peters48bd7f32004-08-29 22:38:38 +0000215
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000216 with open(findfile('test_difflib_expect.html')) as fp:
217 self.assertEqual(actual, fp.read())
Martin v. Löwise064b412004-08-29 16:34:40 +0000218
Gustavo Niemeyer548148812006-01-31 18:34:13 +0000219 def test_recursion_limit(self):
220 # Check if the problem described in patch #1413711 exists.
221 limit = sys.getrecursionlimit()
222 old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
223 new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
224 difflib.SequenceMatcher(None, old, new).get_opcodes()
225
226
R. David Murrayb2416e52010-04-12 16:58:02 +0000227class TestOutputFormat(unittest.TestCase):
228 def test_tab_delimiter(self):
229 args = ['one', 'two', 'Original', 'Current',
230 '2005-01-26 23:30:50', '2010-04-02 10:20:52']
231 ud = difflib.unified_diff(*args, lineterm='')
232 self.assertEqual(list(ud)[0:2], [
233 "--- Original\t2005-01-26 23:30:50",
234 "+++ Current\t2010-04-02 10:20:52"])
235 cd = difflib.context_diff(*args, lineterm='')
236 self.assertEqual(list(cd)[0:2], [
237 "*** Original\t2005-01-26 23:30:50",
238 "--- Current\t2010-04-02 10:20:52"])
239
240 def test_no_trailing_tab_on_empty_filedate(self):
241 args = ['one', 'two', 'Original', 'Current']
242 ud = difflib.unified_diff(*args, lineterm='')
243 self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
244
245 cd = difflib.context_diff(*args, lineterm='')
246 self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
247
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700248 def test_range_format_unified(self):
Raymond Hettinger49353d02011-04-11 12:40:58 -0700249 # Per the diff spec at http://www.unix.org/single_unix_specification/
250 spec = '''\
251 Each <range> field shall be of the form:
252 %1d", <beginning line number> if the range contains exactly one line,
253 and:
254 "%1d,%1d", <beginning line number>, <number of lines> otherwise.
255 If a range is empty, its beginning line number shall be the number of
256 the line just before the range, or 0 if the empty range starts the file.
257 '''
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700258 fmt = difflib._format_range_unified
Raymond Hettinger49353d02011-04-11 12:40:58 -0700259 self.assertEqual(fmt(3,3), '3,0')
260 self.assertEqual(fmt(3,4), '4')
261 self.assertEqual(fmt(3,5), '4,2')
262 self.assertEqual(fmt(3,6), '4,3')
263 self.assertEqual(fmt(0,0), '0,0')
R. David Murrayb2416e52010-04-12 16:58:02 +0000264
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700265 def test_range_format_context(self):
266 # Per the diff spec at http://www.unix.org/single_unix_specification/
267 spec = '''\
268 The range of lines in file1 shall be written in the following format
269 if the range contains two or more lines:
270 "*** %d,%d ****\n", <beginning line number>, <ending line number>
271 and the following format otherwise:
272 "*** %d ****\n", <ending line number>
273 The ending line number of an empty range shall be the number of the preceding line,
274 or 0 if the range is at the start of the file.
275
276 Next, the range of lines in file2 shall be written in the following format
277 if the range contains two or more lines:
278 "--- %d,%d ----\n", <beginning line number>, <ending line number>
279 and the following format otherwise:
280 "--- %d ----\n", <ending line number>
281 '''
282 fmt = difflib._format_range_context
283 self.assertEqual(fmt(3,3), '3')
284 self.assertEqual(fmt(3,4), '4')
285 self.assertEqual(fmt(3,5), '4,5')
286 self.assertEqual(fmt(3,6), '4,6')
287 self.assertEqual(fmt(0,0), '0')
288
289
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000290def test_main():
291 difflib.HtmlDiff._default_prefix = 0
292 Doctests = doctest.DocTestSuite(difflib)
Terry Reedy99f96372010-11-25 06:12:34 +0000293 run_unittest(
294 TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
295 TestOutputFormat, Doctests)
Raymond Hettinger43d790c2003-07-16 04:34:56 +0000296
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000297if __name__ == '__main__':
298 test_main()