Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 1 | """findgremlins - Search through a folder and subfolders for |
| 2 | text files that have characters with bit 8 set, and print |
| 3 | the filename and a bit of context. |
| 4 | |
| 5 | By Just, with a little glue by Jack""" |
| 6 | |
Jack Jansen | b340acf | 2003-01-26 21:40:00 +0000 | [diff] [blame] | 7 | import EasyDialogs |
Jack Jansen | f3f4af5 | 2003-02-05 23:10:46 +0000 | [diff] [blame] | 8 | import MacOS |
Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 9 | import re |
| 10 | import os |
| 11 | import string |
| 12 | import sys |
| 13 | |
| 14 | xpat = re.compile(r"[\200-\377]") |
| 15 | |
| 16 | def walk(top, recurse=1): |
| 17 | if os.path.isdir(top): |
| 18 | if recurse: |
| 19 | for name in os.listdir(top): |
| 20 | path = os.path.join(top, name) |
| 21 | walk(path) |
| 22 | else: |
Jack Jansen | f3f4af5 | 2003-02-05 23:10:46 +0000 | [diff] [blame] | 23 | cr, tp = MacOS.GetCreatorAndType(top) |
| 24 | if tp in ('TEXT', '\0\0\0\0') and top[-4:] <> ".hqx": |
Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 25 | data = open(top).read() |
| 26 | badcount = 0 |
| 27 | for ch in data[:256]: |
| 28 | if ord(ch) == 0 or ord(ch) >= 0200: |
| 29 | badcount = badcount + 1 |
| 30 | if badcount > 16: |
| 31 | print `top`, 'appears to be a binary file' |
| 32 | return |
| 33 | pos = 0 |
| 34 | gotone = 0 |
| 35 | while 1: |
| 36 | m = xpat.search(data, pos) |
| 37 | if m is None: |
| 38 | break |
| 39 | if not gotone: |
| 40 | print `top` |
| 41 | gotone = 1 |
| 42 | [(i, j)] = m.regs |
| 43 | print " ", string.replace(data[i-15:j+15], '\n', ' ') |
| 44 | pos = j |
| 45 | |
| 46 | def main(): |
Jack Jansen | f3f4af5 | 2003-02-05 23:10:46 +0000 | [diff] [blame] | 47 | if sys.argv[1:]: |
| 48 | for pathname in sys.argv[1:]: |
| 49 | walk(pathname) |
| 50 | else: |
| 51 | pathname = EasyDialogs.AskFolder() |
| 52 | if pathname: |
| 53 | walk(pathname) |
Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 54 | |
| 55 | if __name__ == '__main__': |
| 56 | main() |
Jack Jansen | 33a5d7e | 1999-02-03 12:07:14 +0000 | [diff] [blame] | 57 | |