blob: 11540f85bc28a6d7b8856f6b23bb821123b35a96 [file] [log] [blame]
Guido van Rossum4acc25b2000-02-02 15:10:15 +00001"""Efficiently compare files, boolean outcome only (equal / not equal).
2
3Tricks (used in this order):
4 - Use the statcache module to avoid statting files more than once
5 - Files with identical type, size & mtime are assumed to be clones
6 - Files with different type or size cannot be identical
7 - We keep a cache of outcomes of earlier comparisons
8 - We don't fork a process to run 'cmp' but read the files ourselves
9"""
Guido van Rossumc6360141990-10-13 19:23:40 +000010
Guido van Rossum25d7caf1992-03-31 19:04:48 +000011import os
Guido van Rossum276123d1990-10-24 16:39:37 +000012from stat import *
Guido van Rossumc6360141990-10-13 19:23:40 +000013import statcache
14
15
16# The cache.
17#
18cache = {}
19
20
Guido van Rossum59834f11999-06-25 14:21:44 +000021def cmp(f1, f2, shallow=1):
Guido van Rossum4acc25b2000-02-02 15:10:15 +000022 """Compare two files, use the cache if possible.
23 May raise os.error if a stat or open of either fails.
24 Return 1 for identical files, 0 for different.
25 Raise exceptions if either file could not be statted, read, etc."""
26 s1, s2 = sig(statcache.stat(f1)), sig(statcache.stat(f2))
27 if not S_ISREG(s1[0]) or not S_ISREG(s2[0]):
28 # Either is a not a plain file -- always report as different
29 return 0
30 if shallow and s1 == s2:
31 # type, size & mtime match -- report same
32 return 1
Fred Drake132dce22000-12-12 23:11:42 +000033 if s1[:2] != s2[:2]: # Types or sizes differ, don't bother
Guido van Rossum4acc25b2000-02-02 15:10:15 +000034 # types or sizes differ -- report different
35 return 0
36 # same type and size -- look in the cache
37 key = f1 + ' ' + f2
38 if cache.has_key(key):
39 cs1, cs2, outcome = cache[key]
40 # cache hit
41 if s1 == cs1 and s2 == cs2:
42 # cached signatures match
43 return outcome
44 # stale cached signature(s)
45 # really compare
46 outcome = do_cmp(f1, f2)
47 cache[key] = s1, s2, outcome
48 return outcome
Guido van Rossumc6360141990-10-13 19:23:40 +000049
Guido van Rossumc6360141990-10-13 19:23:40 +000050def sig(st):
Guido van Rossum4acc25b2000-02-02 15:10:15 +000051 """Return signature (i.e., type, size, mtime) from raw stat data."""
52 return S_IFMT(st[ST_MODE]), st[ST_SIZE], st[ST_MTIME]
Guido van Rossumc6360141990-10-13 19:23:40 +000053
Guido van Rossumc6360141990-10-13 19:23:40 +000054def do_cmp(f1, f2):
Guido van Rossum4acc25b2000-02-02 15:10:15 +000055 """Compare two files, really."""
56 #print ' cmp', f1, f2 # XXX remove when debugged
57 bufsize = 8*1024 # Could be tuned
58 fp1 = open(f1, 'rb')
59 fp2 = open(f2, 'rb')
60 while 1:
61 b1 = fp1.read(bufsize)
62 b2 = fp2.read(bufsize)
Fred Drake132dce22000-12-12 23:11:42 +000063 if b1 != b2: return 0
Guido van Rossum4acc25b2000-02-02 15:10:15 +000064 if not b1: return 1