Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 1 | """Compare files.""" |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 3 | import os, stat, statcache |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 4 | |
| 5 | _cache = {} |
| 6 | BUFSIZE=8*1024 |
| 7 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 8 | def cmp(f1, f2, shallow=1,use_statcache=0): |
| 9 | """Compare two files. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 10 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 11 | Arguments: |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 12 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 13 | f1 -- First file name |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 14 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 15 | f2 -- Second file name |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 16 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 17 | shallow -- Just check stat signature (do not read the files). |
| 18 | defaults to 1. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 19 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 20 | use_statcache -- Do not stat() each file directly: go through |
| 21 | the statcache module for more efficiency. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 22 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 23 | Return value: |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 24 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 25 | integer -- 1 if the files are the same, 0 otherwise. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 26 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 27 | This function uses a cache for past comparisons and the results, |
| 28 | with a cache invalidation mechanism relying on stale signatures. |
| 29 | Of course, if 'use_statcache' is true, this mechanism is defeated, |
| 30 | and the cache will never grow stale. |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 31 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 32 | """ |
Guido van Rossum | 1916b35 | 2000-03-28 21:42:38 +0000 | [diff] [blame] | 33 | if use_statcache: |
| 34 | stat_function = statcache.stat |
| 35 | else: |
| 36 | stat_function = os.stat |
| 37 | s1 = _sig(stat_function(f1)) |
| 38 | s2 = _sig(stat_function(f2)) |
| 39 | if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: |
| 40 | return 0 |
| 41 | if shallow and s1 == s2: |
| 42 | return 1 |
| 43 | if s1[1] != s2[1]: |
| 44 | return 0 |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 45 | |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 46 | result = _cache.get((f1, f2)) |
Guido van Rossum | 1916b35 | 2000-03-28 21:42:38 +0000 | [diff] [blame] | 47 | if result and (s1, s2) == result[:2]: |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 48 | return result[2] |
| 49 | outcome = _do_cmp(f1, f2) |
| 50 | _cache[f1, f2] = s1, s2, outcome |
| 51 | return outcome |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 52 | |
| 53 | def _sig(st): |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 54 | return (stat.S_IFMT(st[stat.ST_MODE]), |
| 55 | st[stat.ST_SIZE], |
| 56 | st[stat.ST_MTIME]) |
Guido van Rossum | 2d72687 | 1999-10-26 14:02:01 +0000 | [diff] [blame] | 57 | |
| 58 | def _do_cmp(f1, f2): |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 59 | bufsize = BUFSIZE |
Guido van Rossum | 1916b35 | 2000-03-28 21:42:38 +0000 | [diff] [blame] | 60 | fp1 = open(f1, 'rb') |
| 61 | fp2 = open(f2, 'rb') |
Guido van Rossum | 54f22ed | 2000-02-04 15:10:34 +0000 | [diff] [blame] | 62 | while 1: |
Guido van Rossum | 1916b35 | 2000-03-28 21:42:38 +0000 | [diff] [blame] | 63 | b1 = fp1.read(bufsize) |
| 64 | b2 = fp2.read(bufsize) |
| 65 | if b1 != b2: |
| 66 | return 0 |
| 67 | if not b1: |
| 68 | return 1 |