Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 1 | """Compare files.""" |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 3 | import os, stat, statcache |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 4 | |
| 5 | _cache = {} |
| 6 | BUFSIZE=8*1024 |
| 7 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 8 | def cmp(f1, f2, shallow=1,use_statcache=0): |
| 9 | """Compare two files. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 10 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 11 | Arguments: |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 12 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 13 | f1 -- First file name |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 14 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 15 | f2 -- Second file name |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 16 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 17 | shallow -- Just check stat signature (do not read the files). |
| 18 | defaults to 1. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 19 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 20 | use_statcache -- Do not stat() each file directly: go through |
| 21 | the statcache module for more efficiency. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 22 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 23 | Return value: |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 24 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 25 | integer -- 1 if the files are the same, 0 otherwise. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 26 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 27 | This function uses a cache for past comparisons and the results, |
| 28 | with a cache invalidation mechanism relying on stale signatures. |
| 29 | Of course, if 'use_statcache' is true, this mechanism is defeated, |
| 30 | and the cache will never grow stale. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 31 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 32 | """ |
| 33 | stat_function = (os.stat, statcache.stat)[use_statcache] |
| 34 | s1, s2 = _sig(stat_function(f1)), _sig(stat_function(f2)) |
| 35 | if s1[0]!=stat.S_IFREG or s2[0]!=stat.S_IFREG: return 0 |
| 36 | if shallow and s1 == s2: return 1 |
| 37 | if s1[1]!=s2[1]: return 0 |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 38 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 39 | result = _cache.get((f1, f2)) |
| 40 | if result and (s1, s2)==result[:2]: |
| 41 | return result[2] |
| 42 | outcome = _do_cmp(f1, f2) |
| 43 | _cache[f1, f2] = s1, s2, outcome |
| 44 | return outcome |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 45 | |
| 46 | def _sig(st): |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 47 | return (stat.S_IFMT(st[stat.ST_MODE]), |
| 48 | st[stat.ST_SIZE], |
| 49 | st[stat.ST_MTIME]) |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 50 | |
| 51 | def _do_cmp(f1, f2): |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 52 | bufsize = BUFSIZE |
| 53 | fp1 , fp2 = open(f1, 'rb'), open(f2, 'rb') |
| 54 | while 1: |
| 55 | b1, b2 = fp1.read(bufsize), fp2.read(bufsize) |
| 56 | if b1!=b2: return 0 |
| 57 | if not b1: return 1 |