blob: 24ae4b3b115b01fd64dccfd2da527b5794a11e78 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Compare files."""
Guido van Rossum2d726871999-10-26 14:02:01 +00002
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003import os, stat, statcache
Guido van Rossum2d726871999-10-26 14:02:01 +00004
5_cache = {}
6BUFSIZE=8*1024
7
Guido van Rossum54f22ed2000-02-04 15:10:34 +00008def cmp(f1, f2, shallow=1,use_statcache=0):
9 """Compare two files.
Guido van Rossum2d726871999-10-26 14:02:01 +000010
Guido van Rossum54f22ed2000-02-04 15:10:34 +000011 Arguments:
Guido van Rossum2d726871999-10-26 14:02:01 +000012
Guido van Rossum54f22ed2000-02-04 15:10:34 +000013 f1 -- First file name
Guido van Rossum2d726871999-10-26 14:02:01 +000014
Guido van Rossum54f22ed2000-02-04 15:10:34 +000015 f2 -- Second file name
Guido van Rossum2d726871999-10-26 14:02:01 +000016
Guido van Rossum54f22ed2000-02-04 15:10:34 +000017 shallow -- Just check stat signature (do not read the files).
18 defaults to 1.
Guido van Rossum2d726871999-10-26 14:02:01 +000019
Guido van Rossum54f22ed2000-02-04 15:10:34 +000020 use_statcache -- Do not stat() each file directly: go through
21 the statcache module for more efficiency.
Guido van Rossum2d726871999-10-26 14:02:01 +000022
Guido van Rossum54f22ed2000-02-04 15:10:34 +000023 Return value:
Guido van Rossum2d726871999-10-26 14:02:01 +000024
Guido van Rossum54f22ed2000-02-04 15:10:34 +000025 integer -- 1 if the files are the same, 0 otherwise.
Guido van Rossum2d726871999-10-26 14:02:01 +000026
Guido van Rossum54f22ed2000-02-04 15:10:34 +000027 This function uses a cache for past comparisons and the results,
28 with a cache invalidation mechanism relying on stale signatures.
29 Of course, if 'use_statcache' is true, this mechanism is defeated,
30 and the cache will never grow stale.
Guido van Rossum2d726871999-10-26 14:02:01 +000031
Guido van Rossum54f22ed2000-02-04 15:10:34 +000032 """
Guido van Rossum1916b352000-03-28 21:42:38 +000033 if use_statcache:
34 stat_function = statcache.stat
35 else:
36 stat_function = os.stat
37 s1 = _sig(stat_function(f1))
38 s2 = _sig(stat_function(f2))
39 if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG:
40 return 0
41 if shallow and s1 == s2:
42 return 1
43 if s1[1] != s2[1]:
44 return 0
Guido van Rossum2d726871999-10-26 14:02:01 +000045
Guido van Rossum54f22ed2000-02-04 15:10:34 +000046 result = _cache.get((f1, f2))
Guido van Rossum1916b352000-03-28 21:42:38 +000047 if result and (s1, s2) == result[:2]:
Guido van Rossum54f22ed2000-02-04 15:10:34 +000048 return result[2]
49 outcome = _do_cmp(f1, f2)
50 _cache[f1, f2] = s1, s2, outcome
51 return outcome
Guido van Rossum2d726871999-10-26 14:02:01 +000052
53def _sig(st):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000054 return (stat.S_IFMT(st[stat.ST_MODE]),
55 st[stat.ST_SIZE],
56 st[stat.ST_MTIME])
Guido van Rossum2d726871999-10-26 14:02:01 +000057
58def _do_cmp(f1, f2):
Guido van Rossum54f22ed2000-02-04 15:10:34 +000059 bufsize = BUFSIZE
Guido van Rossum1916b352000-03-28 21:42:38 +000060 fp1 = open(f1, 'rb')
61 fp2 = open(f2, 'rb')
Guido van Rossum54f22ed2000-02-04 15:10:34 +000062 while 1:
Guido van Rossum1916b352000-03-28 21:42:38 +000063 b1 = fp1.read(bufsize)
64 b2 = fp2.read(bufsize)
65 if b1 != b2:
66 return 0
67 if not b1:
68 return 1