blob: e5b090c8e507f17cb759d3d237fe10be8ec15378 [file] [log] [blame]
Guido van Rossum7c77f752007-09-27 22:39:12 +00001#! /usr/bin/env python3.0
Guido van Rossum6a8f7b72003-01-01 14:41:25 +00002
3"""Show file statistics by extension."""
4
5import os
6import sys
7
8class Stats:
9
10 def __init__(self):
11 self.stats = {}
12
13 def statargs(self, args):
14 for arg in args:
15 if os.path.isdir(arg):
16 self.statdir(arg)
17 elif os.path.isfile(arg):
18 self.statfile(arg)
19 else:
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000020 sys.stderr.write("Can't find %s\n" % arg)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000021 self.addstats("<???>", "unknown", 1)
22
23 def statdir(self, dir):
24 self.addstats("<dir>", "dirs", 1)
25 try:
26 names = os.listdir(dir)
Guido van Rossumb940e112007-01-10 16:19:56 +000027 except os.error as err:
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000028 sys.stderr.write("Can't list %s: %s\n" % (dir, err))
29 self.addstats("<dir>", "unlistable", 1)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000030 return
31 names.sort()
32 for name in names:
Guido van Rossum21123ab2003-04-09 19:10:46 +000033 if name.startswith(".#"):
34 continue # Skip CVS temp files
35 if name.endswith("~"):
36 continue# Skip Emacs backup files
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000037 full = os.path.join(dir, name)
38 if os.path.islink(full):
39 self.addstats("<lnk>", "links", 1)
40 elif os.path.isdir(full):
41 self.statdir(full)
42 else:
43 self.statfile(full)
44
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000045 def statfile(self, filename):
46 head, ext = os.path.splitext(filename)
47 head, base = os.path.split(filename)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000048 if ext == base:
Guido van Rossum21123ab2003-04-09 19:10:46 +000049 ext = "" # E.g. .cvsignore is deemed not to have an extension
50 ext = os.path.normcase(ext)
51 if not ext:
52 ext = "<none>"
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000053 self.addstats(ext, "files", 1)
54 try:
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000055 f = open(filename, "rb")
Guido van Rossumb940e112007-01-10 16:19:56 +000056 except IOError as err:
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000057 sys.stderr.write("Can't open %s: %s\n" % (filename, err))
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000058 self.addstats(ext, "unopenable", 1)
59 return
60 data = f.read()
61 f.close()
62 self.addstats(ext, "bytes", len(data))
Guido van Rossum7c77f752007-09-27 22:39:12 +000063 if b'\0' in data:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000064 self.addstats(ext, "binary", 1)
65 return
66 if not data:
67 self.addstats(ext, "empty", 1)
68 #self.addstats(ext, "chars", len(data))
Guido van Rossum7c77f752007-09-27 22:39:12 +000069 lines = str(data, "latin-1").splitlines()
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000070 self.addstats(ext, "lines", len(lines))
71 del lines
72 words = data.split()
73 self.addstats(ext, "words", len(words))
74
75 def addstats(self, ext, key, n):
76 d = self.stats.setdefault(ext, {})
77 d[key] = d.get(key, 0) + n
78
79 def report(self):
Guido van Rossum7c77f752007-09-27 22:39:12 +000080 exts = sorted(self.stats)
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000081 # Get the column keys
82 columns = {}
83 for ext in exts:
84 columns.update(self.stats[ext])
Guido van Rossum7c77f752007-09-27 22:39:12 +000085 cols = sorted(columns)
Guido van Rossum21123ab2003-04-09 19:10:46 +000086 colwidth = {}
87 colwidth["ext"] = max([len(ext) for ext in exts])
88 minwidth = 6
89 self.stats["TOTAL"] = {}
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000090 for col in cols:
Guido van Rossum21123ab2003-04-09 19:10:46 +000091 total = 0
92 cw = max(minwidth, len(col))
93 for ext in exts:
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000094 value = self.stats[ext].get(col)
95 if value is None:
Guido van Rossum21123ab2003-04-09 19:10:46 +000096 w = 0
Guido van Rossum6a8f7b72003-01-01 14:41:25 +000097 else:
Guido van Rossum21123ab2003-04-09 19:10:46 +000098 w = len("%d" % value)
99 total += value
100 cw = max(cw, w)
101 cw = max(cw, len(str(total)))
102 colwidth[col] = cw
103 self.stats["TOTAL"][col] = total
104 exts.append("TOTAL")
105 for ext in exts:
106 self.stats[ext]["ext"] = ext
107 cols.insert(0, "ext")
108 def printheader():
109 for col in cols:
Collin Winter6afaeb72007-08-03 17:06:41 +0000110 print("%*s" % (colwidth[col], col), end=' ')
111 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000112 printheader()
113 for ext in exts:
114 for col in cols:
115 value = self.stats[ext].get(col, "")
Collin Winter6afaeb72007-08-03 17:06:41 +0000116 print("%*s" % (colwidth[col], value), end=' ')
117 print()
Guido van Rossum21123ab2003-04-09 19:10:46 +0000118 printheader() # Another header at the bottom
Guido van Rossum6a8f7b72003-01-01 14:41:25 +0000119
120def main():
121 args = sys.argv[1:]
122 if not args:
123 args = [os.curdir]
124 s = Stats()
125 s.statargs(args)
126 s.report()
127
128if __name__ == "__main__":
129 main()