blob: b08be53dd97c89c564650286b7ee9bce5814e1e8 [file] [log] [blame]
Barry Warsaw04f357c2002-07-23 19:04:11 +00001import difflib
Benjamin Petersonee8712c2008-05-20 21:35:26 +00002from test.support import run_unittest, findfile
Neal Norwitze7dfe212003-07-01 14:59:46 +00003import unittest
Raymond Hettinger43d790c2003-07-16 04:34:56 +00004import doctest
Gustavo Niemeyer548148812006-01-31 18:34:13 +00005import sys
Neal Norwitze7dfe212003-07-01 14:59:46 +00006
Neal Norwitze7dfe212003-07-01 14:59:46 +00007
Terry Reedy99f96372010-11-25 06:12:34 +00008class TestWithAscii(unittest.TestCase):
9 def test_one_insert(self):
10 sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12 self.assertEqual(list(sm.get_opcodes()),
13 [ ('insert', 0, 0, 0, 1),
14 ('equal', 0, 100, 1, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000015 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000016 sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17 self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18 self.assertEqual(list(sm.get_opcodes()),
19 [ ('equal', 0, 50, 0, 50),
20 ('insert', 50, 50, 50, 51),
21 ('equal', 50, 100, 51, 101)])
Terry Reedy17a59252010-12-15 20:18:10 +000022 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000023
24 def test_one_delete(self):
25 sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26 self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27 self.assertEqual(list(sm.get_opcodes()),
28 [ ('equal', 0, 40, 0, 40),
29 ('delete', 40, 41, 40, 40),
30 ('equal', 41, 81, 40, 80)])
31
Terry Reedy17a59252010-12-15 20:18:10 +000032 def test_bjunk(self):
33 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35 self.assertEqual(sm.bjunk, set())
36
37 sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39 self.assertEqual(sm.bjunk, {' '})
40
41 sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42 a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43 self.assertEqual(sm.bjunk, {' ', 'b'})
44
Terry Reedy99f96372010-11-25 06:12:34 +000045
46class TestAutojunk(unittest.TestCase):
47 """Tests for the autojunk parameter added in 2.7"""
48 def test_one_insert_homogenous_sequence(self):
49 # By default autojunk=True and the heuristic kicks in for a sequence
50 # of length 200+
51 seq1 = 'b' * 200
52 seq2 = 'a' + 'b' * 200
53
54 sm = difflib.SequenceMatcher(None, seq1, seq2)
55 self.assertAlmostEqual(sm.ratio(), 0, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000056 self.assertEqual(sm.bpopular, {'b'})
Terry Reedy99f96372010-11-25 06:12:34 +000057
58 # Now turn the heuristic off
59 sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60 self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
Terry Reedy17a59252010-12-15 20:18:10 +000061 self.assertEqual(sm.bpopular, set())
Terry Reedy99f96372010-11-25 06:12:34 +000062
63
64class TestSFbugs(unittest.TestCase):
Neal Norwitze7dfe212003-07-01 14:59:46 +000065 def test_ratio_for_null_seqn(self):
66 # Check clearing of SF bug 763023
67 s = difflib.SequenceMatcher(None, [], [])
68 self.assertEqual(s.ratio(), 1)
69 self.assertEqual(s.quick_ratio(), 1)
70 self.assertEqual(s.real_quick_ratio(), 1)
71
Brett Cannond2c5b4b2004-07-10 23:54:07 +000072 def test_comparing_empty_lists(self):
73 # Check fix for bug #979794
74 group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
Georg Brandla18af4e2007-04-21 15:47:16 +000075 self.assertRaises(StopIteration, next, group_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000076 diff_gen = difflib.unified_diff([], [])
Georg Brandla18af4e2007-04-21 15:47:16 +000077 self.assertRaises(StopIteration, next, diff_gen)
Brett Cannond2c5b4b2004-07-10 23:54:07 +000078
Senthil Kumaran758025c2009-11-23 19:02:52 +000079 def test_added_tab_hint(self):
80 # Check fix for bug #1488943
81 diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
82 self.assertEqual("- \tI am a buggy", diff[0])
83 self.assertEqual("? --\n", diff[1])
84 self.assertEqual("+ \t\tI am a bug", diff[2])
85 self.assertEqual("? +\n", diff[3])
86
Martin v. Löwise064b412004-08-29 16:34:40 +000087patch914575_from1 = """
88 1. Beautiful is beTTer than ugly.
89 2. Explicit is better than implicit.
90 3. Simple is better than complex.
91 4. Complex is better than complicated.
92"""
93
94patch914575_to1 = """
95 1. Beautiful is better than ugly.
96 3. Simple is better than complex.
97 4. Complicated is better than complex.
98 5. Flat is better than nested.
99"""
100
101patch914575_from2 = """
102\t\tLine 1: preceeded by from:[tt] to:[ssss]
103 \t\tLine 2: preceeded by from:[sstt] to:[sssst]
104 \t \tLine 3: preceeded by from:[sstst] to:[ssssss]
105Line 4: \thas from:[sst] to:[sss] after :
106Line 5: has from:[t] to:[ss] at end\t
107"""
108
109patch914575_to2 = """
110 Line 1: preceeded by from:[tt] to:[ssss]
111 \tLine 2: preceeded by from:[sstt] to:[sssst]
112 Line 3: preceeded by from:[sstst] to:[ssssss]
113Line 4: has from:[sst] to:[sss] after :
Tim Peters48bd7f32004-08-29 22:38:38 +0000114Line 5: has from:[t] to:[ss] at end
Martin v. Löwise064b412004-08-29 16:34:40 +0000115"""
116
117patch914575_from3 = """line 0
1181234567890123456789012345689012345
119line 1
120line 2
121line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000122line 4 changed
123line 5 changed
124line 6 changed
Martin v. Löwise064b412004-08-29 16:34:40 +0000125line 7
126line 8 subtracted
127line 9
1281234567890123456789012345689012345
129short line
130just fits in!!
131just fits in two lines yup!!
132the end"""
133
134patch914575_to3 = """line 0
1351234567890123456789012345689012345
136line 1
137line 2 added
138line 3
Tim Peters48bd7f32004-08-29 22:38:38 +0000139line 4 chanGEd
140line 5a chanGed
141line 6a changEd
Martin v. Löwise064b412004-08-29 16:34:40 +0000142line 7
143line 8
144line 9
1451234567890
146another long line that needs to be wrapped
147just fitS in!!
148just fits in two lineS yup!!
149the end"""
150
151class TestSFpatches(unittest.TestCase):
152
153 def test_html_diff(self):
154 # Check SF patch 914575 for generating HTML differences
155 f1a = ((patch914575_from1 + '123\n'*10)*3)
156 t1a = (patch914575_to1 + '123\n'*10)*3
157 f1b = '456\n'*10 + f1a
158 t1b = '456\n'*10 + t1a
159 f1a = f1a.splitlines()
160 t1a = t1a.splitlines()
161 f1b = f1b.splitlines()
162 t1b = t1b.splitlines()
163 f2 = patch914575_from2.splitlines()
164 t2 = patch914575_to2.splitlines()
165 f3 = patch914575_from3
166 t3 = patch914575_to3
167 i = difflib.HtmlDiff()
168 j = difflib.HtmlDiff(tabsize=2)
169 k = difflib.HtmlDiff(wrapcolumn=14)
Tim Peters48bd7f32004-08-29 22:38:38 +0000170
Martin v. Löwise064b412004-08-29 16:34:40 +0000171 full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
172 tables = '\n'.join(
173 [
Tim Peters48bd7f32004-08-29 22:38:38 +0000174 '<h2>Context (first diff within numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000175 i.make_table(f1a,t1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000176 '<h2>Context (first diff after numlines=5(default))</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000177 i.make_table(f1b,t1b,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000178 '<h2>Context (numlines=6)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000179 i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
Tim Peters48bd7f32004-08-29 22:38:38 +0000180 '<h2>Context (numlines=0)</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000181 i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
Tim Peters48bd7f32004-08-29 22:38:38 +0000182 '<h2>Same Context</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000183 i.make_table(f1a,f1a,'from','to',context=True),
Tim Peters48bd7f32004-08-29 22:38:38 +0000184 '<h2>Same Full</h2>',
Martin v. Löwise064b412004-08-29 16:34:40 +0000185 i.make_table(f1a,f1a,'from','to',context=False),
186 '<h2>Empty Context</h2>',
187 i.make_table([],[],'from','to',context=True),
188 '<h2>Empty Full</h2>',
189 i.make_table([],[],'from','to',context=False),
190 '<h2>tabsize=2</h2>',
191 j.make_table(f2,t2),
192 '<h2>tabsize=default</h2>',
193 i.make_table(f2,t2),
194 '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
195 k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
196 '<h2>wrapcolumn=14,splitlines()</h2>',
197 k.make_table(f3.splitlines(),t3.splitlines()),
198 '<h2>wrapcolumn=14,splitlines(True)</h2>',
199 k.make_table(f3.splitlines(True),t3.splitlines(True)),
200 ])
201 actual = full.replace('</body>','\n%s\n</body>' % tables)
Tim Peters48bd7f32004-08-29 22:38:38 +0000202
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000203 # temporarily uncomment next two lines to baseline this test
204 #with open('test_difflib_expect.html','w') as fp:
205 # fp.write(actual)
Tim Peters48bd7f32004-08-29 22:38:38 +0000206
Philip Jenveya27c5bd2009-05-28 06:09:08 +0000207 with open(findfile('test_difflib_expect.html')) as fp:
208 self.assertEqual(actual, fp.read())
Martin v. Löwise064b412004-08-29 16:34:40 +0000209
Gustavo Niemeyer548148812006-01-31 18:34:13 +0000210 def test_recursion_limit(self):
211 # Check if the problem described in patch #1413711 exists.
212 limit = sys.getrecursionlimit()
213 old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
214 new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
215 difflib.SequenceMatcher(None, old, new).get_opcodes()
216
217
R. David Murrayb2416e52010-04-12 16:58:02 +0000218class TestOutputFormat(unittest.TestCase):
219 def test_tab_delimiter(self):
220 args = ['one', 'two', 'Original', 'Current',
221 '2005-01-26 23:30:50', '2010-04-02 10:20:52']
222 ud = difflib.unified_diff(*args, lineterm='')
223 self.assertEqual(list(ud)[0:2], [
224 "--- Original\t2005-01-26 23:30:50",
225 "+++ Current\t2010-04-02 10:20:52"])
226 cd = difflib.context_diff(*args, lineterm='')
227 self.assertEqual(list(cd)[0:2], [
228 "*** Original\t2005-01-26 23:30:50",
229 "--- Current\t2010-04-02 10:20:52"])
230
231 def test_no_trailing_tab_on_empty_filedate(self):
232 args = ['one', 'two', 'Original', 'Current']
233 ud = difflib.unified_diff(*args, lineterm='')
234 self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
235
236 cd = difflib.context_diff(*args, lineterm='')
237 self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
238
Raymond Hettinger49353d02011-04-11 12:40:58 -0700239 def test_range_format(self):
240 # Per the diff spec at http://www.unix.org/single_unix_specification/
241 spec = '''\
242 Each <range> field shall be of the form:
243 %1d", <beginning line number> if the range contains exactly one line,
244 and:
245 "%1d,%1d", <beginning line number>, <number of lines> otherwise.
246 If a range is empty, its beginning line number shall be the number of
247 the line just before the range, or 0 if the empty range starts the file.
248 '''
249 fmt = difflib._format_range
250 self.assertEqual(fmt(3,3), '3,0')
251 self.assertEqual(fmt(3,4), '4')
252 self.assertEqual(fmt(3,5), '4,2')
253 self.assertEqual(fmt(3,6), '4,3')
254 self.assertEqual(fmt(0,0), '0,0')
R. David Murrayb2416e52010-04-12 16:58:02 +0000255
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000256def test_main():
257 difflib.HtmlDiff._default_prefix = 0
258 Doctests = doctest.DocTestSuite(difflib)
Terry Reedy99f96372010-11-25 06:12:34 +0000259 run_unittest(
260 TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
261 TestOutputFormat, Doctests)
Raymond Hettinger43d790c2003-07-16 04:34:56 +0000262
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000263if __name__ == '__main__':
264 test_main()