Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 1 | """Compare files.""" |
| 2 | |
| 3 | import os, stat, statcache |
| 4 | |
| 5 | _cache = {} |
| 6 | BUFSIZE=8*1024 |
| 7 | |
| 8 | def cmp(f1, f2, shallow=1,use_statcache=0): |
| 9 | """Compare two files. |
| 10 | |
| 11 | Arguments: |
| 12 | |
| 13 | f1 -- First file name |
| 14 | |
| 15 | f2 -- Second file name |
| 16 | |
| 17 | shallow -- Just check stat signature (do not read the files). |
| 18 | defaults to 1. |
| 19 | |
| 20 | use_statcache -- Do not stat() each file directly: go through |
| 21 | the statcache module for more efficiency. |
| 22 | |
| 23 | Return value: |
| 24 | |
| 25 | integer -- 1 if the files are the same, 0 otherwise. |
| 26 | |
| 27 | This function uses a cache for past comparisons and the results, |
| 28 | with a cache invalidation mechanism relying on stale signatures. |
| 29 | Of course, if 'use_statcache' is true, this mechanism is defeated, |
| 30 | and the cache will never grow stale. |
| 31 | |
| 32 | """ |
| 33 | stat_function = (os.stat, statcache.stat)[use_statcache] |
| 34 | s1, s2 = _sig(stat_function(f1)), _sig(stat_function(f2)) |
| 35 | if s1[0]!=stat.S_IFREG or s2[0]!=stat.S_IFREG: return 0 |
| 36 | if shallow and s1 == s2: return 1 |
| 37 | if s1[1]!=s2[1]: return 0 |
| 38 | |
| 39 | result = _cache.get((f1, f2)) |
| 40 | if result and (s1, s2)==result[:2]: |
| 41 | return result[2] |
| 42 | outcome = _do_cmp(f1, f2) |
| 43 | _cache[f1, f2] = s1, s2, outcome |
| 44 | return outcome |
| 45 | |
| 46 | def _sig(st): |
| 47 | return (stat.S_IFMT(st[stat.ST_MODE]), |
| 48 | st[stat.ST_SIZE], |
| 49 | st[stat.ST_MTIME]) |
| 50 | |
| 51 | def _do_cmp(f1, f2): |
| 52 | bufsize = BUFSIZE |
| 53 | fp1 , fp2 = open(f1, 'rb'), open(f2, 'rb') |
| 54 | while 1: |
| 55 | b1, b2 = fp1.read(bufsize), fp2.read(bufsize) |
| 56 | if b1!=b2: return 0 |
| 57 | if not b1: return 1 |