blob: ca18776a4c5d3bc44712fec9a2a6b5e4d276c561 [file] [log] [blame]
Guido van Rossum6a8f7b72003-01-01 14:41:25 +00001#! /usr/bin/env python
2
3"""Show file statistics by extension."""
4
Éric Araujo0d983b52012-07-02 17:45:10 -04005from __future__ import print_function
6
Guido van Rossum6a8f7b72003-01-01 14:41:25 +00007import os
8import sys
9
10class Stats:
11
12 def __init__(self):
13 self.stats = {}
14
15 def statargs(self, args):
16 for arg in args:
17 if os.path.isdir(arg):
18 self.statdir(arg)
19 elif os.path.isfile(arg):
20 self.statfile(arg)
21 else:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000022 sys.stderr.write("Can't find %s\n" % arg)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000023 self.addstats("<???>", "unknown", 1)
24
25 def statdir(self, dir):
26 self.addstats("<dir>", "dirs", 1)
27 try:
Guido van Rossum1ce5d182011-06-28 10:33:38 -070028 names = sorted(os.listdir(dir))
29 except os.error as err:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000030 sys.stderr.write("Can't list %s: %s\n" % (dir, err))
31 self.addstats("<dir>", "unlistable", 1)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000032 return
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000033 for name in names:
Guido van Rossum21123ab2003-04-09 19:10:46 +000034 if name.startswith(".#"):
35 continue # Skip CVS temp files
36 if name.endswith("~"):
37 continue# Skip Emacs backup files
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000038 full = os.path.join(dir, name)
39 if os.path.islink(full):
40 self.addstats("<lnk>", "links", 1)
41 elif os.path.isdir(full):
42 self.statdir(full)
43 else:
44 self.statfile(full)
45
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000046 def statfile(self, filename):
47 head, ext = os.path.splitext(filename)
48 head, base = os.path.split(filename)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000049 if ext == base:
Guido van Rossum21123ab2003-04-09 19:10:46 +000050 ext = "" # E.g. .cvsignore is deemed not to have an extension
51 ext = os.path.normcase(ext)
52 if not ext:
53 ext = "<none>"
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000054 self.addstats(ext, "files", 1)
55 try:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000056 f = open(filename, "rb")
Guido van Rossum1ce5d182011-06-28 10:33:38 -070057 except IOError as err:
Neal Norwitz7fbb9d12006-03-24 05:36:33 +000058 sys.stderr.write("Can't open %s: %s\n" % (filename, err))
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000059 self.addstats(ext, "unopenable", 1)
60 return
61 data = f.read()
62 f.close()
63 self.addstats(ext, "bytes", len(data))
Guido van Rossum1ce5d182011-06-28 10:33:38 -070064 if b'\0' in data:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000065 self.addstats(ext, "binary", 1)
66 return
67 if not data:
68 self.addstats(ext, "empty", 1)
69 #self.addstats(ext, "chars", len(data))
70 lines = data.splitlines()
71 self.addstats(ext, "lines", len(lines))
72 del lines
73 words = data.split()
74 self.addstats(ext, "words", len(words))
75
76 def addstats(self, ext, key, n):
77 d = self.stats.setdefault(ext, {})
78 d[key] = d.get(key, 0) + n
79
80 def report(self):
Guido van Rossum1ce5d182011-06-28 10:33:38 -070081 exts = sorted(self.stats.keys())
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000082 # Get the column keys
83 columns = {}
84 for ext in exts:
85 columns.update(self.stats[ext])
Guido van Rossum1ce5d182011-06-28 10:33:38 -070086 cols = sorted(columns.keys())
Guido van Rossum21123ab2003-04-09 19:10:46 +000087 colwidth = {}
88 colwidth["ext"] = max([len(ext) for ext in exts])
89 minwidth = 6
90 self.stats["TOTAL"] = {}
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000091 for col in cols:
Guido van Rossum21123ab2003-04-09 19:10:46 +000092 total = 0
93 cw = max(minwidth, len(col))
94 for ext in exts:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000095 value = self.stats[ext].get(col)
96 if value is None:
Guido van Rossum21123ab2003-04-09 19:10:46 +000097 w = 0
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000098 else:
Guido van Rossum21123ab2003-04-09 19:10:46 +000099 w = len("%d" % value)
100 total += value
101 cw = max(cw, w)
102 cw = max(cw, len(str(total)))
103 colwidth[col] = cw
104 self.stats["TOTAL"][col] = total
105 exts.append("TOTAL")
106 for ext in exts:
107 self.stats[ext]["ext"] = ext
108 cols.insert(0, "ext")
109 def printheader():
110 for col in cols:
Guido van Rossum1ce5d182011-06-28 10:33:38 -0700111 print("%*s" % (colwidth[col], col), end=" ")
112 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000113 printheader()
114 for ext in exts:
115 for col in cols:
116 value = self.stats[ext].get(col, "")
Guido van Rossum1ce5d182011-06-28 10:33:38 -0700117 print("%*s" % (colwidth[col], value), end=" ")
118 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000119 printheader() # Another header at the bottom
Guido van Rossum6a8f7b72003-01-01 14:41:25 +0000120
121def main():
122 args = sys.argv[1:]
123 if not args:
124 args = [os.curdir]
125 s = Stats()
126 s.statargs(args)
127 s.report()
128
129if __name__ == "__main__":
130 main()