blob: 138d8dd4dde1baf6bde014790bd5ed0a1e86e5fe [file] [log] [blame]
Guido van Rossum6a8f7b72003-01-01 14:41:25 +00001#! /usr/bin/env python
2
3"""Show file statistics by extension."""
4
5import os
6import sys
7
8class Stats:
9
10 def __init__(self):
11 self.stats = {}
12
13 def statargs(self, args):
14 for arg in args:
15 if os.path.isdir(arg):
16 self.statdir(arg)
17 elif os.path.isfile(arg):
18 self.statfile(arg)
19 else:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000020 sys.stderr.write("Can't find %s\n" % arg)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000021 self.addstats("<???>", "unknown", 1)
22
23 def statdir(self, dir):
24 self.addstats("<dir>", "dirs", 1)
25 try:
Guido van Rossum1ce5d182011-06-28 10:33:38 -070026 names = sorted(os.listdir(dir))
27 except os.error as err:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000028 sys.stderr.write("Can't list %s: %s\n" % (dir, err))
29 self.addstats("<dir>", "unlistable", 1)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000030 return
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000031 for name in names:
Guido van Rossum21123ab2003-04-09 19:10:46 +000032 if name.startswith(".#"):
33 continue # Skip CVS temp files
34 if name.endswith("~"):
35 continue# Skip Emacs backup files
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000036 full = os.path.join(dir, name)
37 if os.path.islink(full):
38 self.addstats("<lnk>", "links", 1)
39 elif os.path.isdir(full):
40 self.statdir(full)
41 else:
42 self.statfile(full)
43
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000044 def statfile(self, filename):
45 head, ext = os.path.splitext(filename)
46 head, base = os.path.split(filename)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000047 if ext == base:
Guido van Rossum21123ab2003-04-09 19:10:46 +000048 ext = "" # E.g. .cvsignore is deemed not to have an extension
49 ext = os.path.normcase(ext)
50 if not ext:
51 ext = "<none>"
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000052 self.addstats(ext, "files", 1)
53 try:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000054 f = open(filename, "rb")
Guido van Rossum1ce5d182011-06-28 10:33:38 -070055 except IOError as err:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000056 sys.stderr.write("Can't open %s: %s\n" % (filename, err))
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000057 self.addstats(ext, "unopenable", 1)
58 return
59 data = f.read()
60 f.close()
61 self.addstats(ext, "bytes", len(data))
Guido van Rossum1ce5d182011-06-28 10:33:38 -070062 if b'\0' in data:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000063 self.addstats(ext, "binary", 1)
64 return
65 if not data:
66 self.addstats(ext, "empty", 1)
67 #self.addstats(ext, "chars", len(data))
68 lines = data.splitlines()
69 self.addstats(ext, "lines", len(lines))
70 del lines
71 words = data.split()
72 self.addstats(ext, "words", len(words))
73
74 def addstats(self, ext, key, n):
75 d = self.stats.setdefault(ext, {})
76 d[key] = d.get(key, 0) + n
77
78 def report(self):
Guido van Rossum1ce5d182011-06-28 10:33:38 -070079 exts = sorted(self.stats.keys())
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000080 # Get the column keys
81 columns = {}
82 for ext in exts:
83 columns.update(self.stats[ext])
Guido van Rossum1ce5d182011-06-28 10:33:38 -070084 cols = sorted(columns.keys())
Guido van Rossum21123ab2003-04-09 19:10:46 +000085 colwidth = {}
86 colwidth["ext"] = max([len(ext) for ext in exts])
87 minwidth = 6
88 self.stats["TOTAL"] = {}
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000089 for col in cols:
Guido van Rossum21123ab2003-04-09 19:10:46 +000090 total = 0
91 cw = max(minwidth, len(col))
92 for ext in exts:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000093 value = self.stats[ext].get(col)
94 if value is None:
Guido van Rossum21123ab2003-04-09 19:10:46 +000095 w = 0
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000096 else:
Guido van Rossum21123ab2003-04-09 19:10:46 +000097 w = len("%d" % value)
98 total += value
99 cw = max(cw, w)
100 cw = max(cw, len(str(total)))
101 colwidth[col] = cw
102 self.stats["TOTAL"][col] = total
103 exts.append("TOTAL")
104 for ext in exts:
105 self.stats[ext]["ext"] = ext
106 cols.insert(0, "ext")
107 def printheader():
108 for col in cols:
Guido van Rossum1ce5d182011-06-28 10:33:38 -0700109 print("%*s" % (colwidth[col], col), end=" ")
110 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000111 printheader()
112 for ext in exts:
113 for col in cols:
114 value = self.stats[ext].get(col, "")
Guido van Rossum1ce5d182011-06-28 10:33:38 -0700115 print("%*s" % (colwidth[col], value), end=" ")
116 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000117 printheader() # Another header at the bottom
Guido van Rossum6a8f7b72003-01-01 14:41:25 +0000118
119def main():
120 args = sys.argv[1:]
121 if not args:
122 args = [os.curdir]
123 s = Stats()
124 s.statargs(args)
125 s.report()
126
127if __name__ == "__main__":
128 main()