blob: 2ba5e5370596e751848366927b32ea5e7b2ac0b4 [file] [log] [blame]
Guido van Rossum83b85181998-05-06 17:43:30 +00001#! /usr/bin/env python
2
3# Released to the public domain $JustDate: 3/16/98 $,
4# by Tim Peters (email tim_one@email.msn.com).
5
6# ndiff file1 file2 -- a human-friendly file differencer.
7
8# $Revision$
9# $NoKeywords: $
10
11# SequenceMatcher tries to compute a "human-friendly diff" between
12# two sequences (chiefly picturing a file as a sequence of lines,
13# and a line as a sequence of characters, here). Unlike UNIX(tm) diff,
14# e.g., the fundamental notion is the longest *contiguous* & junk-free
15# matching subsequence. That's what catches peoples' eyes. The
16# Windows(tm) windiff has another interesting notion, pairing up elements
17# that appear uniquely in each sequence. That, and the method here,
18# appear to yield more intuitive difference reports than does diff. This
19# method appears to be the least vulnerable to synching up on blocks
20# of "junk lines", though (like blank lines in ordinary text files,
21# or maybe "<P>" lines in HTML files). That may be because this is
22# the only method of the 3 that has a *concept* of "junk" <wink>.
23#
24# Note that ndiff makes no claim to produce a *minimal* diff. To the
25# contrary, minimal diffs are often counter-intuitive, because they
26# synch up anywhere possible, sometimes accidental matches 100 pages
27# apart. Restricting synch points to contiguous matches preserves some
28# notion of locality, at the occasional cost of producing a longer diff.
29#
30# With respect to junk, an earlier verion of ndiff simply refused to
31# *start* a match with a junk element. The result was cases like this:
32# before: private Thread currentThread;
33# after: private volatile Thread currentThread;
34# If you consider whitespace to be junk, the longest continguous match
35# not starting with junk is "e Thread currentThread". So ndiff reported
36# that "e volatil" was inserted between the 't' and the 'e' in "private".
37# While an accurate view, to people that's absurd. The current version
38# looks for matching blocks that are entirely junk-free, then extends the
39# longest one of those as far as possible but only with matching junk.
40# So now "currentThread" is matched, then extended to suck up the
41# preceding blank; then "private" is matched, and extended to suck up the
42# following blank; then "Thread" is matched; and finally ndiff reports
43# that "volatile " was inserted before "Thread". The only quibble
44# remaining is that perhaps it was really the case that " volative"
45# was inserted after "private". I can live with that <wink>.
46#
47# NOTE on the output: From an ndiff report,
48# 1) The first file can be recovered by retaining only lines that begin
49# with " " or "- ", and deleting those 2-character prefixes.
50# 2) The second file can be recovered similarly, but by retaining only
51# " " and "+ " lines.
52# 3) Lines beginning with "? " attempt to guide the eye to intraline
53# differences, and were not present in either input file.
54#
55# NOTE on junk: the module-level names
56# IS_LINE_JUNK
57# IS_CHARACTER_JUNK
58# can be set to any functions you like. The first one should accept
59# a single string argument, and return true iff the string is junk.
60# The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
61# line without visible characters, except for at most one splat).
62# The second should accept a string of length 1 etc. The default is
63# whether the character is a blank or tab (note: bad idea to include
64# newline in this!).
65#
66# After setting those, you can call fcompare(f1name, f2name) with the
67# names of the files you want to compare. The difference report
68# is sent to stdout. Or you can call main(), which expects to find
69# (exactly) the two file names in sys.argv.
70
71import string
72TRACE = 0
73
74# define what "junk" means
75import re
76
77def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
78 return pat(line) is not None
79
80def IS_CHARACTER_JUNK(ch, ws=" \t"):
81 return ch in ws
82
83del re
84
85class SequenceMatcher:
86 def __init__(self, isjunk=None, a='', b=''):
87 # Members:
88 # a
89 # first sequence
90 # b
91 # second sequence; differences are computed as "what do
92 # we need to do to 'a' to change it into 'b'?"
93 # b2j
94 # for x in b, b2j[x] is a list of the indices (into b)
95 # at which x appears; junk elements do not appear
96 # b2jhas
97 # b2j.has_key
98 # fullbcount
99 # for x in b, fullbcount[x] == the number of times x
100 # appears in b; only materialized if really needed (used
101 # only for computing quick_ratio())
102 # matching_blocks
103 # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
104 # ascending & non-overlapping in i and in j; terminated by
105 # a dummy (len(a), len(b), 0) sentinel
106 # opcodes
107 # a list of (tag, i1, i2, j1, j2) tuples, where tag is
108 # one of
109 # 'replace' a[i1:i2] should be replaced by b[j1:j2]
110 # 'delete' a[i1:i2] should be deleted
111 # 'insert' b[j1:j2] should be inserted
112 # 'equal' a[i1:i2] == b[j1:j2]
113 # isjunk
114 # a user-supplied function taking a sequence element and
115 # returning true iff the element is "junk" -- this has
116 # subtle but helpful effects on the algorithm, which I'll
117 # get around to writing up someday <0.9 wink>.
118 # DON'T USE! Only __chain_b uses this. Use isbjunk.
119 # isbjunk
120 # for x in b, isbjunk(x) == isjunk(x) but much faster;
121 # it's really the has_key method of a hidden dict.
122 # DOES NOT WORK for x in a!
123
124 self.isjunk = isjunk
125 self.a = self.b = None
126 self.set_seqs(a, b)
127
128 def set_seqs(self, a, b):
129 self.set_seq1(a)
130 self.set_seq2(b)
131
132 def set_seq1(self, a):
133 if a is self.a:
134 return
135 self.a = a
136 self.matching_blocks = self.opcodes = None
137
138 def set_seq2(self, b):
139 if b is self.b:
140 return
141 self.b = b
142 self.matching_blocks = self.opcodes = None
143 self.fullbcount = None
144 self.__chain_b()
145
146 # for each element x in b, set b2j[x] to a list of the indices in
147 # b where x appears; the indices are in increasing order; note that
148 # the number of times x appears in b is len(b2j[x]) ...
149 # when self.isjunk is defined, junk elements don't show up in this
150 # map at all, which stops the central find_longest_match method
151 # from starting any matching block at a junk element ...
152 # also creates the fast isbjunk function ...
153 # note that this is only called when b changes; so for cross-product
154 # kinds of matches, it's best to call set_seq2 once, then set_seq1
155 # repeatedly
156
157 def __chain_b(self):
158 # Because isjunk is a user-defined (not C) function, and we test
159 # for junk a LOT, it's important to minimize the number of calls.
160 # Before the tricks described here, __chain_b was by far the most
161 # time-consuming routine in the whole module! If anyone sees
162 # Jim Roskind, thank him again for profile.py -- I never would
163 # have guessed that.
164 # The first trick is to build b2j ignoring the possibility
165 # of junk. I.e., we don't call isjunk at all yet. Throwing
166 # out the junk later is much cheaper than building b2j "right"
167 # from the start.
168 b = self.b
169 self.b2j = b2j = {}
170 self.b2jhas = b2jhas = b2j.has_key
171 for i in xrange(0, len(b)):
172 elt = b[i]
173 if b2jhas(elt):
174 b2j[elt].append(i)
175 else:
176 b2j[elt] = [i]
177
178 # Now b2j.keys() contains elements uniquely, and especially when
179 # the sequence is a string, that's usually a good deal smaller
180 # than len(string). The difference is the number of isjunk calls
181 # saved.
182 isjunk, junkdict = self.isjunk, {}
183 if isjunk:
184 for elt in b2j.keys():
185 if isjunk(elt):
186 junkdict[elt] = 1 # value irrelevant; it's a set
187 del b2j[elt]
188
189 # Now for x in b, isjunk(x) == junkdict.has_key(x), but the
190 # latter is much faster. Note too that while there may be a
191 # lot of junk in the sequence, the number of *unique* junk
192 # elements is probably small. So the memory burden of keeping
193 # this dict alive is likely trivial compared to the size of b2j.
194 self.isbjunk = junkdict.has_key
195
196 def find_longest_match(self, alo, ahi, blo, bhi):
197 """Find longest matching block in a[alo:ahi] and b[blo:bhi].
198
199 If isjunk is not defined:
200
201 Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
202 alo <= i <= i+k <= ahi
203 blo <= j <= j+k <= bhi
204 and for all (i',j',k') meeting those conditions,
205 k >= k'
206 i <= i'
207 and if i == i', j <= j'
208 In other words, of all maximal matching blocks, returns one
209 that starts earliest in a, and of all those maximal matching
210 blocks that start earliest in a, returns the one that starts
211 earliest in b.
212
213 If isjunk is defined, first the longest matching block is
214 determined as above, but with the additional restriction that
215 no junk element appears in the block. Then that block is
216 extended as far as possible by matching (only) junk elements on
217 both sides. So the resulting block never matches on junk except
218 as identical junk happens to be adjacent to an "interesting"
219 match.
220
221 If no blocks match, returns (alo, blo, 0).
222 """
223
224 # CAUTION: stripping common prefix or suffix would be incorrect.
225 # E.g.,
226 # ab
227 # acab
228 # Longest matching block is "ab", but if common prefix is
229 # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
230 # strip, so ends up claiming that ab is changed to acab by
231 # inserting "ca" in the middle. That's minimal but unintuitive:
232 # "it's obvious" that someone inserted "ac" at the front.
233 # Windiff ends up at the same place as diff, but by pairing up
234 # the unique 'b's and then matching the first two 'a's.
235
236 # find longest junk-free match
237 a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
238 besti, bestj, bestsize = alo, blo, 0
239 for i in xrange(alo, ahi):
240 # check for longest match starting at a[i]
241 if i + bestsize >= ahi:
242 # we're too far right to get a new best
243 break
244 # look at all instances of a[i] in b; note that because
245 # b2j has no junk keys, the loop is skipped if a[i] is junk
246 for j in b2j.get(a[i], []):
247 # a[i] matches b[j]
248 if j < blo:
249 continue
250 if j + bestsize >= bhi:
251 # we're too far right to get a new best, here or
252 # anywhere to the right
253 break
254 if a[i + bestsize] != b[j + bestsize]:
255 # can't be longer match; this test is not necessary
256 # for correctness, but is a huge win for efficiency
257 continue
258 # set k to length of match
259 k = 1 # a[i] == b[j] already known
260 while i + k < ahi and j + k < bhi and \
261 a[i+k] == b[j+k] and not isbjunk(b[j+k]):
262 k = k + 1
263 if k > bestsize:
264 besti, bestj, bestsize = i, j, k
265 if i + bestsize >= ahi:
266 # only time in my life I really wanted a
267 # labelled break <wink> -- we're done with
268 # both loops now
269 break
270
271 # Now that we have a wholly interesting match (albeit possibly
272 # empty!), we may as well suck up the matching junk on each
273 # side of it too. Can't think of a good reason not to, and it
274 # saves post-processing the (possibly considerable) expense of
275 # figuring out what to do with it. In the case of an empty
276 # interesting match, this is clearly the right thing to do,
277 # because no other kind of match is possible in the regions.
278 while besti > alo and bestj > blo and \
279 isbjunk(b[bestj-1]) and \
280 a[besti-1] == b[bestj-1]:
281 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
282 while besti+bestsize < ahi and bestj+bestsize < bhi and \
283 isbjunk(b[bestj+bestsize]) and \
284 a[besti+bestsize] == b[bestj+bestsize]:
285 bestsize = bestsize + 1
286
287 if TRACE:
288 print "get_matching_blocks", alo, ahi, blo, bhi
289 print " returns", besti, bestj, bestsize
290 return besti, bestj, bestsize
291
292# A different implementation, using a binary doubling technique that
293# does far fewer element compares (trades 'em for integer compares),
294# and has n*lg n worst-case behavior. Alas, the code is much harder
295# to follow (the details are tricky!), and in most cases I've seen,
296# it takes at least 50% longer than the "clever dumb" method above;
297# probably due to creating layers of small dicts.
298# NOTE: this no longer matches the version above wrt junk; remains
299# too unpromising to update it; someday, though ...
300
301# def find_longest_match(self, alo, ahi, blo, bhi):
302# """Find longest matching block in a[alo:ahi] and b[blo:bhi].
303#
304# Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
305# alo <= i <= i+k <= ahi
306# blo <= j <= j+k <= bhi
307# and for all (i',j',k') meeting those conditions,
308# k >= k'
309# i <= i'
310# and if i == i', j <= j'
311# In other words, of all maximal matching blocks, returns one
312# that starts earliest in a, and of all those maximal matching
313# blocks that start earliest in a, returns the one that starts
314# earliest in b.
315#
316# If no blocks match, returns (alo, blo, 0).
317# """
318#
319# a, b2j = self.a, self.b2j
320# # alljs[size][i] is a set of all j's s.t. a[i:i+len] matches
321# # b[j:j+len]
322# alljs = {}
323# alljs[1] = js = {}
324# ahits = {}
325# for i in xrange(alo, ahi):
326# elt = a[i]
327# if ahits.has_key(elt):
328# js[i] = ahits[elt]
329# continue
330# if b2j.has_key(elt):
331# in_range = {}
332# for j in b2j[elt]:
333# if j >= blo:
334# if j >= bhi:
335# break
336# in_range[j] = 1
337# if in_range:
338# ahits[elt] = js[i] = in_range
339# del ahits
340# size = 1
341# while js:
342# oldsize = size
343# size = size + size
344# oldjs = js
345# alljs[size] = js = {}
346# for i in oldjs.keys():
347# # i has matches of size oldsize
348# if not oldjs.has_key(i + oldsize):
349# # can't double it
350# continue
351# second_js = oldjs[i + oldsize]
352# answer = {}
353# for j in oldjs[i].keys():
354# if second_js.has_key(j + oldsize):
355# answer[j] = 1
356# if answer:
357# js[i] = answer
358# del alljs[size]
359# size = size >> 1 # max power of 2 with a match
360# if not size:
361# return alo, blo, 0
362# besti, bestj, bestsize = alo, blo, 0
363# fatis = alljs[size].keys()
364# fatis.sort()
365# for i in fatis:
366# # figure out longest match starting at a[i]
367# totalsize = halfsize = size
368# # i has matches of len totalsize at the indices in js
369# js = alljs[size][i].keys()
370# while halfsize > 1:
371# halfsize = halfsize >> 1
372# # is there a match of len halfsize starting at
373# # i + totalsize?
374# newjs = []
375# if alljs[halfsize].has_key(i + totalsize):
376# second_js = alljs[halfsize][i + totalsize]
377# for j in js:
378# if second_js.has_key(j + totalsize):
379# newjs.append(j)
380# if newjs:
381# totalsize = totalsize + halfsize
382# js = newjs
383# if totalsize > bestsize:
384# besti, bestj, bestsize = i, min(js), totalsize
385# return besti, bestj, bestsize
386
387 def get_matching_blocks(self):
388 if self.matching_blocks is not None:
389 return self.matching_blocks
390 self.matching_blocks = []
391 la, lb = len(self.a), len(self.b)
392 self.__helper(0, la, 0, lb, self.matching_blocks)
393 self.matching_blocks.append( (la, lb, 0) )
394 if TRACE:
395 print '*** matching blocks', self.matching_blocks
396 return self.matching_blocks
397
398 # builds list of matching blocks covering a[alo:ahi] and
399 # b[blo:bhi], appending them in increasing order to answer
400
401 def __helper(self, alo, ahi, blo, bhi, answer):
402 i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
403 # a[alo:i] vs b[blo:j] unknown
404 # a[i:i+k] same as b[j:j+k]
405 # a[i+k:ahi] vs b[j+k:bhi] unknown
406 if k:
407 if alo < i and blo < j:
408 self.__helper(alo, i, blo, j, answer)
409 answer.append( x )
410 if i+k < ahi and j+k < bhi:
411 self.__helper(i+k, ahi, j+k, bhi, answer)
412
413 def ratio(self):
414 """Return a measure of the sequences' similarity (float in [0,1]).
415
416 Where T is the total number of elements in both sequences, and
417 M is the number of matches, this is 2*M / T.
418 Note that this is 1 if the sequences are identical, and 0 if
419 they have nothing in common.
420 """
421
422 matches = reduce(lambda sum, triple: sum + triple[-1],
423 self.get_matching_blocks(), 0)
424 return 2.0 * matches / (len(self.a) + len(self.b))
425
426 def quick_ratio(self):
427 """Return an upper bound on ratio() relatively quickly."""
428 # viewing a and b as multisets, set matches to the cardinality
429 # of their intersection; this counts the number of matches
430 # without regard to order, so is clearly an upper bound
431 if self.fullbcount is None:
432 self.fullbcount = fullbcount = {}
433 for elt in self.b:
434 fullbcount[elt] = fullbcount.get(elt, 0) + 1
435 fullbcount = self.fullbcount
436 # avail[x] is the number of times x appears in 'b' less the
437 # number of times we've seen it in 'a' so far ... kinda
438 avail = {}
439 availhas, matches = avail.has_key, 0
440 for elt in self.a:
441 if availhas(elt):
442 numb = avail[elt]
443 else:
444 numb = fullbcount.get(elt, 0)
445 avail[elt] = numb - 1
446 if numb > 0:
447 matches = matches + 1
448 return 2.0 * matches / (len(self.a) + len(self.b))
449
450 def real_quick_ratio(self):
451 """Return an upper bound on ratio() very quickly"""
452 la, lb = len(self.a), len(self.b)
453 # can't have more matches than the number of elements in the
454 # shorter sequence
455 return 2.0 * min(la, lb) / (la + lb)
456
457 def get_opcodes(self):
458 if self.opcodes is not None:
459 return self.opcodes
460 i = j = 0
461 self.opcodes = answer = []
462 for ai, bj, size in self.get_matching_blocks():
463 # invariant: we've pumped out correct diffs to change
464 # a[:i] into b[:j], and the next matching block is
465 # a[ai:ai+size] == b[bj:bj+size]. So we need to pump
466 # out a diff to change a[i:ai] into b[j:bj], pump out
467 # the matching block, and move (i,j) beyond the match
468 tag = ''
469 if i < ai and j < bj:
470 tag = 'replace'
471 elif i < ai:
472 tag = 'delete'
473 elif j < bj:
474 tag = 'insert'
475 if tag:
476 answer.append( (tag, i, ai, j, bj) )
477 i, j = ai+size, bj+size
478 # the list of matching blocks is terminated by a
479 # sentinel with size 0
480 if size:
481 answer.append( ('equal', ai, i, bj, j) )
482 return answer
483
484# meant for dumping lines
485def dump(tag, x, lo, hi):
486 for i in xrange(lo, hi):
487 print tag, x[i],
488
489# figure out which mark to stick under characters in lines that
490# have changed (blank = same, - = deleted, + = inserted, ^ = replaced)
491_combine = { ' ': ' ',
492 '. ': '-',
493 ' .': '+',
494 '..': '^' }
495
496def plain_replace(a, alo, ahi, b, blo, bhi):
497 assert alo < ahi and blo < bhi
498 # dump the shorter block first -- reduces the burden on short-term
499 # memory if the blocks are of very different sizes
500 if bhi - blo < ahi - alo:
501 dump('+', b, blo, bhi)
502 dump('-', a, alo, ahi)
503 else:
504 dump('-', a, alo, ahi)
505 dump('+', b, blo, bhi)
506
507# When replacing one block of lines with another, this guy searches
508# the blocks for *similar* lines; the best-matching pair (if any) is
509# used as a synch point, and intraline difference marking is done on
510# the similar pair. Lots of work, but often worth it.
511
512def fancy_replace(a, alo, ahi, b, blo, bhi):
513 if TRACE:
514 print '*** fancy_replace', alo, ahi, blo, bhi
515 dump('>', a, alo, ahi)
516 dump('<', b, blo, bhi)
517
518 # don't synch up unless the lines have a similarity score of at
519 # least cutoff; best_ratio tracks the best score seen so far
520 best_ratio, cutoff = 0.74, 0.75
521 cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
522 eqi, eqj = None, None # 1st indices of equal lines (if any)
523
524 # search for the pair that matches best without being identical
525 # (identical lines must be junk lines, & we don't want to synch up
526 # on junk -- unless we have to)
527 for j in xrange(blo, bhi):
528 bj = b[j]
529 cruncher.set_seq2(bj)
530 for i in xrange(alo, ahi):
531 ai = a[i]
532 if ai == bj:
533 if eqi is None:
534 eqi, eqj = i, j
535 continue
536 cruncher.set_seq1(ai)
537 # computing similarity is expensive, so use the quick
538 # upper bounds first -- have seen this speed up messy
539 # compares by a factor of 3.
540 # note that ratio() is only expensive to compute the first
541 # time it's called on a sequence pair; the expensive part
542 # of the computation is cached by cruncher
543 if cruncher.real_quick_ratio() > best_ratio and \
544 cruncher.quick_ratio() > best_ratio and \
545 cruncher.ratio() > best_ratio:
546 best_ratio, best_i, best_j = cruncher.ratio(), i, j
547 if best_ratio < cutoff:
548 # no non-identical "pretty close" pair
549 if eqi is None:
550 # no identical pair either -- treat it as a straight replace
551 plain_replace(a, alo, ahi, b, blo, bhi)
552 return
553 # no close pair, but an identical pair -- synch up on that
554 best_i, best_j, best_ratio = eqi, eqj, 1.0
555 else:
556 # there's a close pair, so forget the identical pair (if any)
557 eqi = None
558
559 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
560 # identical
561 if TRACE:
562 print '*** best_ratio', best_ratio, best_i, best_j
563 dump('>', a, best_i, best_i+1)
564 dump('<', b, best_j, best_j+1)
565
566 # pump out diffs from before the synch point
567 fancy_helper(a, alo, best_i, b, blo, best_j)
568
569 # do intraline marking on the synch pair
570 aelt, belt = a[best_i], b[best_j]
571 if eqi is None:
572 # pump out a '-', '+', '?' triple for the synched lines;
573 atags = btags = ""
574 cruncher.set_seqs(aelt, belt)
575 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
576 la, lb = ai2 - ai1, bj2 - bj1
577 if tag == 'replace':
578 atags = atags + '.' * la
579 btags = btags + '.' * lb
580 elif tag == 'delete':
581 atags = atags + '.' * la
582 elif tag == 'insert':
583 btags = btags + '.' * lb
584 elif tag == 'equal':
585 atags = atags + ' ' * la
586 btags = btags + ' ' * lb
587 else:
588 raise ValueError, 'unknown tag ' + `tag`
589 la, lb = len(atags), len(btags)
590 if la < lb:
591 atags = atags + ' ' * (lb - la)
592 elif lb < la:
593 btags = btags + ' ' * (la - lb)
594 combined = map(lambda x,y: _combine[x+y], atags, btags)
595 print '-', aelt, '+', belt, '?', \
596 string.rstrip(string.join(combined, ''))
597 else:
598 # the synch pair is identical
599 print ' ', aelt,
600
601 # pump out diffs from after the synch point
602 fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
603
604def fancy_helper(a, alo, ahi, b, blo, bhi):
605 if alo < ahi:
606 if blo < bhi:
607 fancy_replace(a, alo, ahi, b, blo, bhi)
608 else:
609 dump('-', a, alo, ahi)
610 elif blo < bhi:
611 dump('+', b, blo, bhi)
612
613# open a file & return the file object; gripe and return 0 if it
614# couldn't be opened
615def fopen(fname):
616 try:
617 return open(fname, 'r')
618 except IOError, detail:
619 print "couldn't open " + fname + ": " + `detail`
620 return 0
621
622# open two files & spray the diff to stdout; return false iff a problem
623def fcompare(f1name, f2name):
624 f1 = fopen(f1name)
625 f2 = fopen(f2name)
626 if not f1 or not f2:
627 return 0
628
629 a = f1.readlines(); f1.close()
630 b = f2.readlines(); f2.close()
631
632 cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
633 for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
634 if tag == 'replace':
635 fancy_replace(a, alo, ahi, b, blo, bhi)
636 elif tag == 'delete':
637 dump('-', a, alo, ahi)
638 elif tag == 'insert':
639 dump('+', b, blo, bhi)
640 elif tag == 'equal':
641 dump(' ', a, alo, ahi)
642 else:
643 raise ValueError, 'unknown tag ' + `tag`
644
645 return 1
646
647# get file names from argv & compare; return false iff a problem
648def main():
649 from sys import argv
650 if len(argv) != 3:
651 print 'need 2 args'
652 return 0
653 [f1name, f2name] = argv[1:3]
654 print '-:', f1name
655 print '+:', f2name
656 return fcompare(f1name, f2name)
657
658if __name__ == '__main__':
659 if 1:
660 main()
661 else:
662 import profile, pstats
663 statf = "ndiff.pro"
664 profile.run("main()", statf)
665 stats = pstats.Stats(statf)
666 stats.strip_dirs().sort_stats('time').print_stats()
667