blob: 3f453af610ccda2909d2b0665593f79fe0868664 [file] [log] [blame]
Guido van Rossum83b85181998-05-06 17:43:30 +00001#! /usr/bin/env python
2
Guido van Rossuma3433e81999-03-27 13:34:01 +00003# Module ndiff version 1.3.0
4# Released to the public domain 26-Mar-1999,
5# by Tim Peters (tim_one@email.msn.com).
Guido van Rossum83b85181998-05-06 17:43:30 +00006
Guido van Rossuma3433e81999-03-27 13:34:01 +00007# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
Guido van Rossum83b85181998-05-06 17:43:30 +00008
Guido van Rossuma3433e81999-03-27 13:34:01 +00009"""ndiff [-q] file1 file2
10
11Print a human-friendly file difference report to stdout. Both inter-
12and intra-line differences are noted.
13
14If -q ("quiet") is not specified, the first two lines of output are
15
16-: file1
17+: file2
18
19Each remaining line begins with a two-letter code:
20
21 "- " line unique to file1
22 "+ " line unique to file2
23 " " line common to both files
24 "? " line not present in either input file
25
26Lines beginning with "? " attempt to guide the eye to intraline
27differences, and were not present in either input file.
28
29The first file can be recovered by retaining only lines that begin with
30" " or "- ", and deleting those 2-character prefixes.
31
32The second file can be recovered similarly, but by retaining only " "
33and "+ " lines. On Unix, the second file can be recovered by piping the
34output through
35 sed -n '/^[+ ] /s/^..//p'
36Modifications to recover the first file are left as an exercise for
37the reader.
38
39See module comments for details and programmatic interface.
40"""
41
42__version__ = 1, 3, 0
Guido van Rossum83b85181998-05-06 17:43:30 +000043
44# SequenceMatcher tries to compute a "human-friendly diff" between
45# two sequences (chiefly picturing a file as a sequence of lines,
Guido van Rossuma3433e81999-03-27 13:34:01 +000046# and a line as a sequence of characters, here). Unlike e.g. UNIX(tm)
47# diff, the fundamental notion is the longest *contiguous* & junk-free
Guido van Rossum83b85181998-05-06 17:43:30 +000048# matching subsequence. That's what catches peoples' eyes. The
49# Windows(tm) windiff has another interesting notion, pairing up elements
50# that appear uniquely in each sequence. That, and the method here,
51# appear to yield more intuitive difference reports than does diff. This
52# method appears to be the least vulnerable to synching up on blocks
53# of "junk lines", though (like blank lines in ordinary text files,
54# or maybe "<P>" lines in HTML files). That may be because this is
55# the only method of the 3 that has a *concept* of "junk" <wink>.
56#
57# Note that ndiff makes no claim to produce a *minimal* diff. To the
58# contrary, minimal diffs are often counter-intuitive, because they
59# synch up anywhere possible, sometimes accidental matches 100 pages
60# apart. Restricting synch points to contiguous matches preserves some
61# notion of locality, at the occasional cost of producing a longer diff.
62#
Guido van Rossuma3433e81999-03-27 13:34:01 +000063# With respect to junk, an earlier version of ndiff simply refused to
Guido van Rossum83b85181998-05-06 17:43:30 +000064# *start* a match with a junk element. The result was cases like this:
65# before: private Thread currentThread;
66# after: private volatile Thread currentThread;
Guido van Rossuma3433e81999-03-27 13:34:01 +000067# If you consider whitespace to be junk, the longest contiguous match
Guido van Rossum83b85181998-05-06 17:43:30 +000068# not starting with junk is "e Thread currentThread". So ndiff reported
69# that "e volatil" was inserted between the 't' and the 'e' in "private".
70# While an accurate view, to people that's absurd. The current version
71# looks for matching blocks that are entirely junk-free, then extends the
72# longest one of those as far as possible but only with matching junk.
73# So now "currentThread" is matched, then extended to suck up the
74# preceding blank; then "private" is matched, and extended to suck up the
75# following blank; then "Thread" is matched; and finally ndiff reports
76# that "volatile " was inserted before "Thread". The only quibble
Guido van Rossuma3433e81999-03-27 13:34:01 +000077# remaining is that perhaps it was really the case that " volatile"
Guido van Rossum83b85181998-05-06 17:43:30 +000078# was inserted after "private". I can live with that <wink>.
79#
Guido van Rossum83b85181998-05-06 17:43:30 +000080# NOTE on junk: the module-level names
81# IS_LINE_JUNK
82# IS_CHARACTER_JUNK
83# can be set to any functions you like. The first one should accept
84# a single string argument, and return true iff the string is junk.
85# The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
86# line without visible characters, except for at most one splat).
87# The second should accept a string of length 1 etc. The default is
88# whether the character is a blank or tab (note: bad idea to include
89# newline in this!).
90#
91# After setting those, you can call fcompare(f1name, f2name) with the
92# names of the files you want to compare. The difference report
Guido van Rossuma3433e81999-03-27 13:34:01 +000093# is sent to stdout. Or you can call main(args), passing what would
94# have been in sys.argv[1:] had the cmd-line form been used.
Guido van Rossum83b85181998-05-06 17:43:30 +000095
96import string
97TRACE = 0
98
99# define what "junk" means
100import re
101
102def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
103 return pat(line) is not None
104
105def IS_CHARACTER_JUNK(ch, ws=" \t"):
106 return ch in ws
107
108del re
109
110class SequenceMatcher:
111 def __init__(self, isjunk=None, a='', b=''):
112 # Members:
113 # a
114 # first sequence
115 # b
116 # second sequence; differences are computed as "what do
117 # we need to do to 'a' to change it into 'b'?"
118 # b2j
119 # for x in b, b2j[x] is a list of the indices (into b)
120 # at which x appears; junk elements do not appear
121 # b2jhas
122 # b2j.has_key
123 # fullbcount
124 # for x in b, fullbcount[x] == the number of times x
125 # appears in b; only materialized if really needed (used
126 # only for computing quick_ratio())
127 # matching_blocks
128 # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
129 # ascending & non-overlapping in i and in j; terminated by
130 # a dummy (len(a), len(b), 0) sentinel
131 # opcodes
132 # a list of (tag, i1, i2, j1, j2) tuples, where tag is
133 # one of
134 # 'replace' a[i1:i2] should be replaced by b[j1:j2]
135 # 'delete' a[i1:i2] should be deleted
136 # 'insert' b[j1:j2] should be inserted
137 # 'equal' a[i1:i2] == b[j1:j2]
138 # isjunk
139 # a user-supplied function taking a sequence element and
140 # returning true iff the element is "junk" -- this has
141 # subtle but helpful effects on the algorithm, which I'll
142 # get around to writing up someday <0.9 wink>.
143 # DON'T USE! Only __chain_b uses this. Use isbjunk.
144 # isbjunk
145 # for x in b, isbjunk(x) == isjunk(x) but much faster;
146 # it's really the has_key method of a hidden dict.
147 # DOES NOT WORK for x in a!
148
149 self.isjunk = isjunk
150 self.a = self.b = None
151 self.set_seqs(a, b)
152
153 def set_seqs(self, a, b):
154 self.set_seq1(a)
155 self.set_seq2(b)
156
157 def set_seq1(self, a):
158 if a is self.a:
159 return
160 self.a = a
161 self.matching_blocks = self.opcodes = None
162
163 def set_seq2(self, b):
164 if b is self.b:
165 return
166 self.b = b
167 self.matching_blocks = self.opcodes = None
168 self.fullbcount = None
169 self.__chain_b()
170
Guido van Rossuma3433e81999-03-27 13:34:01 +0000171 # For each element x in b, set b2j[x] to a list of the indices in
Guido van Rossum83b85181998-05-06 17:43:30 +0000172 # b where x appears; the indices are in increasing order; note that
173 # the number of times x appears in b is len(b2j[x]) ...
174 # when self.isjunk is defined, junk elements don't show up in this
175 # map at all, which stops the central find_longest_match method
176 # from starting any matching block at a junk element ...
177 # also creates the fast isbjunk function ...
178 # note that this is only called when b changes; so for cross-product
179 # kinds of matches, it's best to call set_seq2 once, then set_seq1
180 # repeatedly
181
182 def __chain_b(self):
183 # Because isjunk is a user-defined (not C) function, and we test
184 # for junk a LOT, it's important to minimize the number of calls.
185 # Before the tricks described here, __chain_b was by far the most
186 # time-consuming routine in the whole module! If anyone sees
187 # Jim Roskind, thank him again for profile.py -- I never would
188 # have guessed that.
189 # The first trick is to build b2j ignoring the possibility
190 # of junk. I.e., we don't call isjunk at all yet. Throwing
191 # out the junk later is much cheaper than building b2j "right"
192 # from the start.
193 b = self.b
194 self.b2j = b2j = {}
195 self.b2jhas = b2jhas = b2j.has_key
Guido van Rossuma3433e81999-03-27 13:34:01 +0000196 for i in xrange(len(b)):
Guido van Rossum83b85181998-05-06 17:43:30 +0000197 elt = b[i]
198 if b2jhas(elt):
199 b2j[elt].append(i)
200 else:
201 b2j[elt] = [i]
202
203 # Now b2j.keys() contains elements uniquely, and especially when
204 # the sequence is a string, that's usually a good deal smaller
205 # than len(string). The difference is the number of isjunk calls
206 # saved.
207 isjunk, junkdict = self.isjunk, {}
208 if isjunk:
209 for elt in b2j.keys():
210 if isjunk(elt):
211 junkdict[elt] = 1 # value irrelevant; it's a set
212 del b2j[elt]
213
214 # Now for x in b, isjunk(x) == junkdict.has_key(x), but the
215 # latter is much faster. Note too that while there may be a
216 # lot of junk in the sequence, the number of *unique* junk
217 # elements is probably small. So the memory burden of keeping
218 # this dict alive is likely trivial compared to the size of b2j.
219 self.isbjunk = junkdict.has_key
220
221 def find_longest_match(self, alo, ahi, blo, bhi):
222 """Find longest matching block in a[alo:ahi] and b[blo:bhi].
223
224 If isjunk is not defined:
225
226 Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
227 alo <= i <= i+k <= ahi
228 blo <= j <= j+k <= bhi
229 and for all (i',j',k') meeting those conditions,
230 k >= k'
231 i <= i'
232 and if i == i', j <= j'
Guido van Rossuma3433e81999-03-27 13:34:01 +0000233 In other words, of all maximal matching blocks, return one
Guido van Rossum83b85181998-05-06 17:43:30 +0000234 that starts earliest in a, and of all those maximal matching
Guido van Rossuma3433e81999-03-27 13:34:01 +0000235 blocks that start earliest in a, return the one that starts
Guido van Rossum83b85181998-05-06 17:43:30 +0000236 earliest in b.
237
238 If isjunk is defined, first the longest matching block is
239 determined as above, but with the additional restriction that
240 no junk element appears in the block. Then that block is
241 extended as far as possible by matching (only) junk elements on
242 both sides. So the resulting block never matches on junk except
243 as identical junk happens to be adjacent to an "interesting"
244 match.
245
Guido van Rossuma3433e81999-03-27 13:34:01 +0000246 If no blocks match, return (alo, blo, 0).
Guido van Rossum83b85181998-05-06 17:43:30 +0000247 """
248
249 # CAUTION: stripping common prefix or suffix would be incorrect.
250 # E.g.,
251 # ab
252 # acab
253 # Longest matching block is "ab", but if common prefix is
254 # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
255 # strip, so ends up claiming that ab is changed to acab by
256 # inserting "ca" in the middle. That's minimal but unintuitive:
257 # "it's obvious" that someone inserted "ac" at the front.
258 # Windiff ends up at the same place as diff, but by pairing up
259 # the unique 'b's and then matching the first two 'a's.
260
Guido van Rossum83b85181998-05-06 17:43:30 +0000261 a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
262 besti, bestj, bestsize = alo, blo, 0
Guido van Rossuma3433e81999-03-27 13:34:01 +0000263 # find longest junk-free match
264 # during an iteration of the loop, j2len[j] = length of longest
265 # junk-free match ending with a[i-1] and b[j]
266 j2len = {}
267 nothing = []
Guido van Rossum83b85181998-05-06 17:43:30 +0000268 for i in xrange(alo, ahi):
Guido van Rossum83b85181998-05-06 17:43:30 +0000269 # look at all instances of a[i] in b; note that because
270 # b2j has no junk keys, the loop is skipped if a[i] is junk
Guido van Rossuma3433e81999-03-27 13:34:01 +0000271 j2lenget = j2len.get
272 newj2len = {}
273 for j in b2j.get(a[i], nothing):
Guido van Rossum83b85181998-05-06 17:43:30 +0000274 # a[i] matches b[j]
275 if j < blo:
276 continue
Guido van Rossuma3433e81999-03-27 13:34:01 +0000277 if j >= bhi:
Guido van Rossum83b85181998-05-06 17:43:30 +0000278 break
Guido van Rossuma3433e81999-03-27 13:34:01 +0000279 k = newj2len[j] = j2lenget(j-1, 0) + 1
Guido van Rossum83b85181998-05-06 17:43:30 +0000280 if k > bestsize:
Guido van Rossuma3433e81999-03-27 13:34:01 +0000281 besti, bestj, bestsize = i-k+1, j-k+1, k
282 j2len = newj2len
Guido van Rossum83b85181998-05-06 17:43:30 +0000283
284 # Now that we have a wholly interesting match (albeit possibly
285 # empty!), we may as well suck up the matching junk on each
286 # side of it too. Can't think of a good reason not to, and it
287 # saves post-processing the (possibly considerable) expense of
288 # figuring out what to do with it. In the case of an empty
289 # interesting match, this is clearly the right thing to do,
290 # because no other kind of match is possible in the regions.
291 while besti > alo and bestj > blo and \
292 isbjunk(b[bestj-1]) and \
293 a[besti-1] == b[bestj-1]:
294 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
295 while besti+bestsize < ahi and bestj+bestsize < bhi and \
296 isbjunk(b[bestj+bestsize]) and \
297 a[besti+bestsize] == b[bestj+bestsize]:
298 bestsize = bestsize + 1
299
300 if TRACE:
301 print "get_matching_blocks", alo, ahi, blo, bhi
302 print " returns", besti, bestj, bestsize
303 return besti, bestj, bestsize
304
Guido van Rossum83b85181998-05-06 17:43:30 +0000305 def get_matching_blocks(self):
306 if self.matching_blocks is not None:
307 return self.matching_blocks
308 self.matching_blocks = []
309 la, lb = len(self.a), len(self.b)
310 self.__helper(0, la, 0, lb, self.matching_blocks)
311 self.matching_blocks.append( (la, lb, 0) )
312 if TRACE:
313 print '*** matching blocks', self.matching_blocks
314 return self.matching_blocks
315
316 # builds list of matching blocks covering a[alo:ahi] and
317 # b[blo:bhi], appending them in increasing order to answer
318
319 def __helper(self, alo, ahi, blo, bhi, answer):
320 i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
321 # a[alo:i] vs b[blo:j] unknown
322 # a[i:i+k] same as b[j:j+k]
323 # a[i+k:ahi] vs b[j+k:bhi] unknown
324 if k:
325 if alo < i and blo < j:
326 self.__helper(alo, i, blo, j, answer)
327 answer.append( x )
328 if i+k < ahi and j+k < bhi:
329 self.__helper(i+k, ahi, j+k, bhi, answer)
330
331 def ratio(self):
332 """Return a measure of the sequences' similarity (float in [0,1]).
333
334 Where T is the total number of elements in both sequences, and
335 M is the number of matches, this is 2*M / T.
336 Note that this is 1 if the sequences are identical, and 0 if
337 they have nothing in common.
338 """
339
340 matches = reduce(lambda sum, triple: sum + triple[-1],
341 self.get_matching_blocks(), 0)
342 return 2.0 * matches / (len(self.a) + len(self.b))
343
344 def quick_ratio(self):
345 """Return an upper bound on ratio() relatively quickly."""
346 # viewing a and b as multisets, set matches to the cardinality
347 # of their intersection; this counts the number of matches
348 # without regard to order, so is clearly an upper bound
349 if self.fullbcount is None:
350 self.fullbcount = fullbcount = {}
351 for elt in self.b:
352 fullbcount[elt] = fullbcount.get(elt, 0) + 1
353 fullbcount = self.fullbcount
354 # avail[x] is the number of times x appears in 'b' less the
355 # number of times we've seen it in 'a' so far ... kinda
356 avail = {}
357 availhas, matches = avail.has_key, 0
358 for elt in self.a:
359 if availhas(elt):
360 numb = avail[elt]
361 else:
362 numb = fullbcount.get(elt, 0)
363 avail[elt] = numb - 1
364 if numb > 0:
365 matches = matches + 1
366 return 2.0 * matches / (len(self.a) + len(self.b))
367
368 def real_quick_ratio(self):
369 """Return an upper bound on ratio() very quickly"""
370 la, lb = len(self.a), len(self.b)
371 # can't have more matches than the number of elements in the
372 # shorter sequence
373 return 2.0 * min(la, lb) / (la + lb)
374
375 def get_opcodes(self):
376 if self.opcodes is not None:
377 return self.opcodes
378 i = j = 0
379 self.opcodes = answer = []
380 for ai, bj, size in self.get_matching_blocks():
381 # invariant: we've pumped out correct diffs to change
382 # a[:i] into b[:j], and the next matching block is
383 # a[ai:ai+size] == b[bj:bj+size]. So we need to pump
384 # out a diff to change a[i:ai] into b[j:bj], pump out
385 # the matching block, and move (i,j) beyond the match
386 tag = ''
387 if i < ai and j < bj:
388 tag = 'replace'
389 elif i < ai:
390 tag = 'delete'
391 elif j < bj:
392 tag = 'insert'
393 if tag:
394 answer.append( (tag, i, ai, j, bj) )
395 i, j = ai+size, bj+size
396 # the list of matching blocks is terminated by a
397 # sentinel with size 0
398 if size:
399 answer.append( ('equal', ai, i, bj, j) )
400 return answer
401
402# meant for dumping lines
403def dump(tag, x, lo, hi):
404 for i in xrange(lo, hi):
405 print tag, x[i],
406
407# figure out which mark to stick under characters in lines that
408# have changed (blank = same, - = deleted, + = inserted, ^ = replaced)
409_combine = { ' ': ' ',
410 '. ': '-',
411 ' .': '+',
412 '..': '^' }
413
414def plain_replace(a, alo, ahi, b, blo, bhi):
415 assert alo < ahi and blo < bhi
416 # dump the shorter block first -- reduces the burden on short-term
417 # memory if the blocks are of very different sizes
418 if bhi - blo < ahi - alo:
419 dump('+', b, blo, bhi)
420 dump('-', a, alo, ahi)
421 else:
422 dump('-', a, alo, ahi)
423 dump('+', b, blo, bhi)
424
425# When replacing one block of lines with another, this guy searches
426# the blocks for *similar* lines; the best-matching pair (if any) is
427# used as a synch point, and intraline difference marking is done on
428# the similar pair. Lots of work, but often worth it.
429
430def fancy_replace(a, alo, ahi, b, blo, bhi):
431 if TRACE:
432 print '*** fancy_replace', alo, ahi, blo, bhi
433 dump('>', a, alo, ahi)
434 dump('<', b, blo, bhi)
435
436 # don't synch up unless the lines have a similarity score of at
437 # least cutoff; best_ratio tracks the best score seen so far
438 best_ratio, cutoff = 0.74, 0.75
439 cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
440 eqi, eqj = None, None # 1st indices of equal lines (if any)
441
442 # search for the pair that matches best without being identical
443 # (identical lines must be junk lines, & we don't want to synch up
444 # on junk -- unless we have to)
445 for j in xrange(blo, bhi):
446 bj = b[j]
447 cruncher.set_seq2(bj)
448 for i in xrange(alo, ahi):
449 ai = a[i]
450 if ai == bj:
451 if eqi is None:
452 eqi, eqj = i, j
453 continue
454 cruncher.set_seq1(ai)
455 # computing similarity is expensive, so use the quick
456 # upper bounds first -- have seen this speed up messy
457 # compares by a factor of 3.
458 # note that ratio() is only expensive to compute the first
459 # time it's called on a sequence pair; the expensive part
460 # of the computation is cached by cruncher
461 if cruncher.real_quick_ratio() > best_ratio and \
462 cruncher.quick_ratio() > best_ratio and \
463 cruncher.ratio() > best_ratio:
464 best_ratio, best_i, best_j = cruncher.ratio(), i, j
465 if best_ratio < cutoff:
466 # no non-identical "pretty close" pair
467 if eqi is None:
468 # no identical pair either -- treat it as a straight replace
469 plain_replace(a, alo, ahi, b, blo, bhi)
470 return
471 # no close pair, but an identical pair -- synch up on that
472 best_i, best_j, best_ratio = eqi, eqj, 1.0
473 else:
474 # there's a close pair, so forget the identical pair (if any)
475 eqi = None
476
477 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
478 # identical
479 if TRACE:
480 print '*** best_ratio', best_ratio, best_i, best_j
481 dump('>', a, best_i, best_i+1)
482 dump('<', b, best_j, best_j+1)
483
484 # pump out diffs from before the synch point
485 fancy_helper(a, alo, best_i, b, blo, best_j)
486
487 # do intraline marking on the synch pair
488 aelt, belt = a[best_i], b[best_j]
489 if eqi is None:
490 # pump out a '-', '+', '?' triple for the synched lines;
491 atags = btags = ""
492 cruncher.set_seqs(aelt, belt)
493 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
494 la, lb = ai2 - ai1, bj2 - bj1
495 if tag == 'replace':
496 atags = atags + '.' * la
497 btags = btags + '.' * lb
498 elif tag == 'delete':
499 atags = atags + '.' * la
500 elif tag == 'insert':
501 btags = btags + '.' * lb
502 elif tag == 'equal':
503 atags = atags + ' ' * la
504 btags = btags + ' ' * lb
505 else:
506 raise ValueError, 'unknown tag ' + `tag`
507 la, lb = len(atags), len(btags)
508 if la < lb:
509 atags = atags + ' ' * (lb - la)
510 elif lb < la:
511 btags = btags + ' ' * (la - lb)
512 combined = map(lambda x,y: _combine[x+y], atags, btags)
513 print '-', aelt, '+', belt, '?', \
514 string.rstrip(string.join(combined, ''))
515 else:
516 # the synch pair is identical
517 print ' ', aelt,
518
519 # pump out diffs from after the synch point
520 fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
521
522def fancy_helper(a, alo, ahi, b, blo, bhi):
523 if alo < ahi:
524 if blo < bhi:
525 fancy_replace(a, alo, ahi, b, blo, bhi)
526 else:
527 dump('-', a, alo, ahi)
528 elif blo < bhi:
529 dump('+', b, blo, bhi)
530
531# open a file & return the file object; gripe and return 0 if it
532# couldn't be opened
533def fopen(fname):
534 try:
535 return open(fname, 'r')
536 except IOError, detail:
Guido van Rossuma3433e81999-03-27 13:34:01 +0000537 print "couldn't open " + fname + ": " + str(detail)
Guido van Rossum83b85181998-05-06 17:43:30 +0000538 return 0
539
540# open two files & spray the diff to stdout; return false iff a problem
541def fcompare(f1name, f2name):
542 f1 = fopen(f1name)
543 f2 = fopen(f2name)
544 if not f1 or not f2:
545 return 0
546
547 a = f1.readlines(); f1.close()
548 b = f2.readlines(); f2.close()
549
550 cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
551 for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
552 if tag == 'replace':
553 fancy_replace(a, alo, ahi, b, blo, bhi)
554 elif tag == 'delete':
555 dump('-', a, alo, ahi)
556 elif tag == 'insert':
557 dump('+', b, blo, bhi)
558 elif tag == 'equal':
559 dump(' ', a, alo, ahi)
560 else:
561 raise ValueError, 'unknown tag ' + `tag`
562
563 return 1
564
Guido van Rossuma3433e81999-03-27 13:34:01 +0000565# crack args (sys.argv[1:] is normal) & compare;
566# return false iff a problem
567
568def main(args):
569 import getopt
570 try:
571 opts, args = getopt.getopt(args, "q")
572 except getopt.error, detail:
573 print str(detail)
574 print __doc__
Guido van Rossum83b85181998-05-06 17:43:30 +0000575 return 0
Guido van Rossuma3433e81999-03-27 13:34:01 +0000576 noisy = 1
577 for opt, val in opts:
578 if opt == "-q":
579 noisy = 0
580 if len(args) != 2:
581 print 'need 2 args'
582 print __doc__
583 return 0
584 f1name, f2name = args
585 if noisy:
586 print '-:', f1name
587 print '+:', f2name
Guido van Rossum83b85181998-05-06 17:43:30 +0000588 return fcompare(f1name, f2name)
589
590if __name__ == '__main__':
Guido van Rossuma3433e81999-03-27 13:34:01 +0000591 import sys
592 args = sys.argv[1:]
Guido van Rossum83b85181998-05-06 17:43:30 +0000593 if 1:
Guido van Rossuma3433e81999-03-27 13:34:01 +0000594 main(args)
Guido van Rossum83b85181998-05-06 17:43:30 +0000595 else:
596 import profile, pstats
597 statf = "ndiff.pro"
Guido van Rossuma3433e81999-03-27 13:34:01 +0000598 profile.run("main(args)", statf)
Guido van Rossum83b85181998-05-06 17:43:30 +0000599 stats = pstats.Stats(statf)
600 stats.strip_dirs().sort_stats('time').print_stats()