Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 1 | """findgremlins - Search through a folder and subfolders for |
| 2 | text files that have characters with bit 8 set, and print |
| 3 | the filename and a bit of context. |
| 4 | |
| 5 | By Just, with a little glue by Jack""" |
| 6 | |
| 7 | import macfs |
| 8 | import re |
| 9 | import os |
| 10 | import string |
| 11 | import sys |
| 12 | |
| 13 | xpat = re.compile(r"[\200-\377]") |
| 14 | |
| 15 | def walk(top, recurse=1): |
| 16 | if os.path.isdir(top): |
| 17 | if recurse: |
| 18 | for name in os.listdir(top): |
| 19 | path = os.path.join(top, name) |
| 20 | walk(path) |
| 21 | else: |
| 22 | cr, tp = macfs.FSSpec(top).GetCreatorType() |
| 23 | if tp == 'TEXT' and top[-4:] <> ".hqx": |
| 24 | data = open(top).read() |
| 25 | badcount = 0 |
| 26 | for ch in data[:256]: |
| 27 | if ord(ch) == 0 or ord(ch) >= 0200: |
| 28 | badcount = badcount + 1 |
| 29 | if badcount > 16: |
| 30 | print `top`, 'appears to be a binary file' |
| 31 | return |
| 32 | pos = 0 |
| 33 | gotone = 0 |
| 34 | while 1: |
| 35 | m = xpat.search(data, pos) |
| 36 | if m is None: |
| 37 | break |
| 38 | if not gotone: |
| 39 | print `top` |
| 40 | gotone = 1 |
| 41 | [(i, j)] = m.regs |
| 42 | print " ", string.replace(data[i-15:j+15], '\n', ' ') |
| 43 | pos = j |
| 44 | |
| 45 | def main(): |
| 46 | fss, ok = macfs.GetDirectory() |
| 47 | if ok: |
| 48 | walk(fss.as_pathname()) |
| 49 | |
| 50 | if __name__ == '__main__': |
| 51 | main() |
| 52 | sys.exit(1) # So we see the output |
| 53 | |