blob: 325449aa557cc13d61625781d210d663e958cf4f [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001import difflib
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test.support import run_unittest, findfile
Neal Norwitze7dfe212003-07-01 14:59:46 +00003import unittest
Raymond Hettinger43d790c2003-07-16 04:34:56 +00004import doctest
Gustavo Niemeyer548148812006-01-31 18:34:13 +00005import sys
Neal Norwitze7dfe212003-07-01 14:59:46 +00006
Neal Norwitze7dfe212003-07-01 14:59:46 +00007
Terry Reedy99f96372010-11-25 06:12:34 +00008class TestWithAscii(unittest.TestCase):
9 def test_one_insert(self):
10 sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12 self.assertEqual(list(sm.get_opcodes()),
13 [ ('insert', 0, 0, 0, 1),
14 ('equal', 0, 100, 1, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000015 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000016 sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18 self.assertEqual(list(sm.get_opcodes()),
19 [ ('equal', 0, 50, 0, 50),
20 ('insert', 50, 50, 50, 51),
21 ('equal', 50, 100, 51, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000022 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000023
24 def test_one_delete(self):
25 sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26 self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27 self.assertEqual(list(sm.get_opcodes()),
28 [ ('equal', 0, 40, 0, 40),
29 ('delete', 40, 41, 40, 40),
30 ('equal', 41, 81, 40, 80)])
31
Terry Reedy17a59252010-12-15 20:18:10 +000032 def test_bjunk(self):
33 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35 self.assertEqual(sm.bjunk, set())
36
37 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39 self.assertEqual(sm.bjunk, {' '})
40
41 sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43 self.assertEqual(sm.bjunk, {' ', 'b'})
44
Terry Reedy99f96372010-11-25 06:12:34 +000045
46class TestAutojunk(unittest.TestCase):
47 """Tests for the autojunk parameter added in 2.7"""
48 def test_one_insert_homogenous_sequence(self):
49 # By default autojunk=True and the heuristic kicks in for a sequence
50 # of length 200+
51 seq1 = 'b' * 200
52 seq2 = 'a' + 'b' * 200
53
54 sm = difflib.SequenceMatcher(None, seq1, seq2)
55 self.assertAlmostEqual(sm.ratio(), 0, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000056 self.assertEqual(sm.bpopular, {'b'})
Terry Reedy99f96372010-11-25 06:12:34 +000057
58 # Now turn the heuristic off
59 sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60 self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000061 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000062
63
64class TestSFbugs(unittest.TestCase):
Neal Norwitze7dfe212003-07-01 14:59:46 +000065 def test_ratio_for_null_seqn(self):
66 # Check clearing of SF bug 763023
67 s = difflib.SequenceMatcher(None, [], [])
68 self.assertEqual(s.ratio(), 1)
69 self.assertEqual(s.quick_ratio(), 1)
70 self.assertEqual(s.real_quick_ratio(), 1)
71
Brett Cannond2c5b4b2004-07-10 23:54:07 +000072 def test_comparing_empty_lists(self):
73 # Check fix for bug #979794
74 group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
Georg Brandla18af4e2007-04-21 15:47:16 +000075 self.assertRaises(StopIteration, next, group_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000076 diff_gen = difflib.unified_diff([], [])
Georg Brandla18af4e2007-04-21 15:47:16 +000077 self.assertRaises(StopIteration, next, diff_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000078
Senthil Kumaran758025c2009-11-23 19:02:52 +000079 def test_added_tab_hint(self):
80 # Check fix for bug #1488943
81 diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
82 self.assertEqual("- \tI am a buggy", diff[0])
83 self.assertEqual("? --\n", diff[1])
84 self.assertEqual("+ \t\tI am a bug", diff[2])
85 self.assertEqual("? +\n", diff[3])
86
Martin v. Löwise064b412004-08-29 16:34:40 +000087patch914575_from1 = """
88 1. Beautiful is beTTer than ugly.
89 2. Explicit is better than implicit.
90 3. Simple is better than complex.
91 4. Complex is better than complicated.
92"""
93
94patch914575_to1 = """
95 1. Beautiful is better than ugly.
96 3. Simple is better than complex.
97 4. Complicated is better than complex.
98 5. Flat is better than nested.
99"""
100
101patch914575_from2 = """
102\t\tLine 1: preceeded by from:[tt] to:[ssss]
103 \t\tLine 2: preceeded by from:[sstt] to:[sssst]
104 \t \tLine 3: preceeded by from:[sstst] to:[ssssss]
105Line 4: \thas from:[sst] to:[sss] after :
106Line 5: has from:[t] to:[ss] at end\t
107"""
108
109patch914575_to2 = """
110 Line 1: preceeded by from:[tt] to:[ssss]
111 \tLine 2: preceeded by from:[sstt] to:[sssst]
112 Line 3: preceeded by from:[sstst] to:[ssssss]
113Line 4: has from:[sst] to:[sss] after :
Tim Peters48bd7f32004-08-29 22:38:38 +0000114Line 5: has from:[t] to:[ss] at end
Martin v. Löwise064b412004-08-29 16:34:40 +0000115"""
116
117patch914575_from3 = """line 0
1181234567890123456789012345689012345
119line 1
120line 2
121line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000122line 4 changed
123line 5 changed
124line 6 changed
Martin v. Löwise064b412004-08-29 16:34:40 +0000125line 7
126line 8 subtracted
127line 9
1281234567890123456789012345689012345
129short line
130just fits in!!
131just fits in two lines yup!!
132the end"""
133
134patch914575_to3 = """line 0
1351234567890123456789012345689012345
136line 1
137line 2 added
138line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000139line 4 chanGEd
140line 5a chanGed
141line 6a changEd
Martin v. Löwise064b412004-08-29 16:34:40 +0000142line 7
143line 8
144line 9
1451234567890
146another long line that needs to be wrapped
147just fitS in!!
148just fits in two lineS yup!!
149the end"""
150
151class TestSFpatches(unittest.TestCase):
152
153 def test_html_diff(self):
154 # Check SF patch 914575 for generating HTML differences
155 f1a = ((patch914575_from1 + '123\n'*10)*3)
156 t1a = (patch914575_to1 + '123\n'*10)*3
157 f1b = '456\n'*10 + f1a
158 t1b = '456\n'*10 + t1a
159 f1a = f1a.splitlines()
160 t1a = t1a.splitlines()
161 f1b = f1b.splitlines()
162 t1b = t1b.splitlines()
163 f2 = patch914575_from2.splitlines()
164 t2 = patch914575_to2.splitlines()
165 f3 = patch914575_from3
166 t3 = patch914575_to3
167 i = difflib.HtmlDiff()
168 j = difflib.HtmlDiff(tabsize=2)
169 k = difflib.HtmlDiff(wrapcolumn=14)
Tim Peters48bd7f32004-08-29 22:38:38 +0000170
Martin v. Löwise064b412004-08-29 16:34:40 +0000171 full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
172 tables = '\n'.join(
173 [
Tim Peters48bd7f32004-08-29 22:38:38 +0000174 '<h2>Context (first diff within numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000175 i.make_table(f1a,t1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000176 '<h2>Context (first diff after numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000177 i.make_table(f1b,t1b,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000178 '<h2>Context (numlines=6)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000179 i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
Tim Peters48bd7f32004-08-29 22:38:38 +0000180 '<h2>Context (numlines=0)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000181 i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
Tim Peters48bd7f32004-08-29 22:38:38 +0000182 '<h2>Same Context</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000183 i.make_table(f1a,f1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000184 '<h2>Same Full</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000185 i.make_table(f1a,f1a,'from','to',context=False),
186 '<h2>Empty Context</h2>',
187 i.make_table([],[],'from','to',context=True),
188 '<h2>Empty Full</h2>',
189 i.make_table([],[],'from','to',context=False),
190 '<h2>tabsize=2</h2>',
191 j.make_table(f2,t2),
192 '<h2>tabsize=default</h2>',
193 i.make_table(f2,t2),
194 '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
195 k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
196 '<h2>wrapcolumn=14,splitlines()</h2>',
197 k.make_table(f3.splitlines(),t3.splitlines()),
198 '<h2>wrapcolumn=14,splitlines(True)</h2>',
199 k.make_table(f3.splitlines(True),t3.splitlines(True)),
200 ])
201 actual = full.replace('</body>','\n%s\n</body>' % tables)
Tim Peters48bd7f32004-08-29 22:38:38 +0000202
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000203 # temporarily uncomment next two lines to baseline this test
204 #with open('test_difflib_expect.html','w') as fp:
205 # fp.write(actual)
Tim Peters48bd7f32004-08-29 22:38:38 +0000206
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000207 with open(findfile('test_difflib_expect.html')) as fp:
208 self.assertEqual(actual, fp.read())
Martin v. Löwise064b412004-08-29 16:34:40 +0000209
Gustavo Niemeyer548148812006-01-31 18:34:13 +0000210 def test_recursion_limit(self):
211 # Check if the problem described in patch #1413711 exists.
212 limit = sys.getrecursionlimit()
213 old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
214 new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
215 difflib.SequenceMatcher(None, old, new).get_opcodes()
216
217
R. David Murrayb2416e52010-04-12 16:58:02 +0000218class TestOutputFormat(unittest.TestCase):
219 def test_tab_delimiter(self):
220 args = ['one', 'two', 'Original', 'Current',
221 '2005-01-26 23:30:50', '2010-04-02 10:20:52']
222 ud = difflib.unified_diff(*args, lineterm='')
223 self.assertEqual(list(ud)[0:2], [
224 "--- Original\t2005-01-26 23:30:50",
225 "+++ Current\t2010-04-02 10:20:52"])
226 cd = difflib.context_diff(*args, lineterm='')
227 self.assertEqual(list(cd)[0:2], [
228 "*** Original\t2005-01-26 23:30:50",
229 "--- Current\t2010-04-02 10:20:52"])
230
231 def test_no_trailing_tab_on_empty_filedate(self):
232 args = ['one', 'two', 'Original', 'Current']
233 ud = difflib.unified_diff(*args, lineterm='')
234 self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
235
236 cd = difflib.context_diff(*args, lineterm='')
237 self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
238
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700239 def test_range_format_unified(self):
Raymond Hettinger49353d02011-04-11 12:40:58 -0700240 # Per the diff spec at http://www.unix.org/single_unix_specification/
241 spec = '''\
242 Each <range> field shall be of the form:
243 %1d", <beginning line number> if the range contains exactly one line,
244 and:
245 "%1d,%1d", <beginning line number>, <number of lines> otherwise.
246 If a range is empty, its beginning line number shall be the number of
247 the line just before the range, or 0 if the empty range starts the file.
248 '''
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700249 fmt = difflib._format_range_unified
Raymond Hettinger49353d02011-04-11 12:40:58 -0700250 self.assertEqual(fmt(3,3), '3,0')
251 self.assertEqual(fmt(3,4), '4')
252 self.assertEqual(fmt(3,5), '4,2')
253 self.assertEqual(fmt(3,6), '4,3')
254 self.assertEqual(fmt(0,0), '0,0')
R. David Murrayb2416e52010-04-12 16:58:02 +0000255
Raymond Hettinger9180deb2011-04-12 15:25:30 -0700256 def test_range_format_context(self):
257 # Per the diff spec at http://www.unix.org/single_unix_specification/
258 spec = '''\
259 The range of lines in file1 shall be written in the following format
260 if the range contains two or more lines:
261 "*** %d,%d ****\n", <beginning line number>, <ending line number>
262 and the following format otherwise:
263 "*** %d ****\n", <ending line number>
264 The ending line number of an empty range shall be the number of the preceding line,
265 or 0 if the range is at the start of the file.
266
267 Next, the range of lines in file2 shall be written in the following format
268 if the range contains two or more lines:
269 "--- %d,%d ----\n", <beginning line number>, <ending line number>
270 and the following format otherwise:
271 "--- %d ----\n", <ending line number>
272 '''
273 fmt = difflib._format_range_context
274 self.assertEqual(fmt(3,3), '3')
275 self.assertEqual(fmt(3,4), '4')
276 self.assertEqual(fmt(3,5), '4,5')
277 self.assertEqual(fmt(3,6), '4,6')
278 self.assertEqual(fmt(0,0), '0')
279
280
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000281def test_main():
282 difflib.HtmlDiff._default_prefix = 0
283 Doctests = doctest.DocTestSuite(difflib)
Terry Reedy99f96372010-11-25 06:12:34 +0000284 run_unittest(
285 TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
286 TestOutputFormat, Doctests)
Raymond Hettinger43d790c2003-07-16 04:34:56 +0000287
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000288if __name__ == '__main__':
289 test_main()