blob: adc925e59810eb4def809b418579c582085dccbb [file] [log] [blame]
Guido van Rossum6a8f7b72003-01-01 14:41:25 +00001#! /usr/bin/env python
2
3"""Show file statistics by extension."""
4
5import os
6import sys
7
8class Stats:
9
10 def __init__(self):
11 self.stats = {}
12
13 def statargs(self, args):
14 for arg in args:
15 if os.path.isdir(arg):
16 self.statdir(arg)
17 elif os.path.isfile(arg):
18 self.statfile(arg)
19 else:
20 sys.stderr.write("Can't find %s\n" % file)
21 self.addstats("<???>", "unknown", 1)
22
23 def statdir(self, dir):
24 self.addstats("<dir>", "dirs", 1)
25 try:
26 names = os.listdir(dir)
27 except os.error, err:
28 sys.stderr.write("Can't list %s: %s\n" % (file, err))
29 self.addstats(ext, "unlistable", 1)
30 return
31 names.sort()
32 for name in names:
33 full = os.path.join(dir, name)
34 if os.path.islink(full):
35 self.addstats("<lnk>", "links", 1)
36 elif os.path.isdir(full):
37 self.statdir(full)
38 else:
39 self.statfile(full)
40
41 def statfile(self, file):
42 head, ext = os.path.splitext(file)
43 head, base = os.path.split(file)
44 if ext == base:
45 ext = "" # .cvsignore is deemed not to have an extension
46 self.addstats(ext, "files", 1)
47 try:
48 f = open(file, "rb")
49 except IOError, err:
50 sys.stderr.write("Can't open %s: %s\n" % (file, err))
51 self.addstats(ext, "unopenable", 1)
52 return
53 data = f.read()
54 f.close()
55 self.addstats(ext, "bytes", len(data))
56 if '\0' in data:
57 self.addstats(ext, "binary", 1)
58 return
59 if not data:
60 self.addstats(ext, "empty", 1)
61 #self.addstats(ext, "chars", len(data))
62 lines = data.splitlines()
63 self.addstats(ext, "lines", len(lines))
64 del lines
65 words = data.split()
66 self.addstats(ext, "words", len(words))
67
68 def addstats(self, ext, key, n):
69 d = self.stats.setdefault(ext, {})
70 d[key] = d.get(key, 0) + n
71
72 def report(self):
73 totals = {}
74 exts = self.stats.keys()
75 exts.sort()
76 # Get the column keys
77 columns = {}
78 for ext in exts:
79 columns.update(self.stats[ext])
80 cols = columns.keys()
81 cols.sort()
82 minwidth = 7
83 extwidth = max([len(ext) for ext in exts])
84 print "%*s" % (extwidth, "ext"),
85 for col in cols:
86 width = max(len(col), minwidth)
87 print "%*s" % (width, col),
88 print
89 for ext in exts:
90 print "%*s" % (extwidth, ext),
91 for col in cols:
92 width = max(len(col), minwidth)
93 value = self.stats[ext].get(col)
94 if value is None:
95 s = ""
96 else:
97 s = "%d" % value
98 totals[col] = totals.get(col, 0) + value
99 print "%*s" % (width, s),
100 print
101 print "%*s" % (extwidth, "TOTAL"),
102 for col in cols:
103 width = max(len(col), minwidth)
104 print "%*s" % (width, totals[col]),
105 print
106
107def main():
108 args = sys.argv[1:]
109 if not args:
110 args = [os.curdir]
111 s = Stats()
112 s.statargs(args)
113 s.report()
114
115if __name__ == "__main__":
116 main()