blob: 595551e6724c11441b4823bafa996bfb9e58cfd9 [file] [log] [blame]
Jack Jansen33a5d7e1999-02-03 12:07:14 +00001"""findgremlins - Search through a folder and subfolders for
2text files that have characters with bit 8 set, and print
3the filename and a bit of context.
4
5By Just, with a little glue by Jack"""
6
Jack Jansenb340acf2003-01-26 21:40:00 +00007import EasyDialogs
Jack Jansen33a5d7e1999-02-03 12:07:14 +00008import macfs
9import re
10import os
11import string
12import sys
13
14xpat = re.compile(r"[\200-\377]")
15
16def walk(top, recurse=1):
17 if os.path.isdir(top):
18 if recurse:
19 for name in os.listdir(top):
20 path = os.path.join(top, name)
21 walk(path)
22 else:
23 cr, tp = macfs.FSSpec(top).GetCreatorType()
24 if tp == 'TEXT' and top[-4:] <> ".hqx":
25 data = open(top).read()
26 badcount = 0
27 for ch in data[:256]:
28 if ord(ch) == 0 or ord(ch) >= 0200:
29 badcount = badcount + 1
30 if badcount > 16:
31 print `top`, 'appears to be a binary file'
32 return
33 pos = 0
34 gotone = 0
35 while 1:
36 m = xpat.search(data, pos)
37 if m is None:
38 break
39 if not gotone:
40 print `top`
41 gotone = 1
42 [(i, j)] = m.regs
43 print " ", string.replace(data[i-15:j+15], '\n', ' ')
44 pos = j
45
46def main():
Jack Jansenb340acf2003-01-26 21:40:00 +000047 pathname = EasyDialogs.AskFolder()
48 if pathname:
49 walk(pathname)
Jack Jansen33a5d7e1999-02-03 12:07:14 +000050
51if __name__ == '__main__':
52 main()
53 sys.exit(1) # So we see the output
54