PageRenderTime 547ms CodeModel.GetById 423ms app.highlight 16ms RepoModel.GetById 105ms app.codeStats 0ms

/Tools/scripts/byext.py

http://unladen-swallow.googlecode.com/
Python | 131 lines | 122 code | 7 blank | 2 comment | 9 complexity | 42dc6f8bc2cac43e59407331e3adbe55 MD5 | raw file
  1#! /usr/bin/env python
  2
  3"""Show file statistics by extension."""
  4
  5import os
  6import sys
  7
  8class Stats:
  9
 10    def __init__(self):
 11        self.stats = {}
 12
 13    def statargs(self, args):
 14        for arg in args:
 15            if os.path.isdir(arg):
 16                self.statdir(arg)
 17            elif os.path.isfile(arg):
 18                self.statfile(arg)
 19            else:
 20                sys.stderr.write("Can't find %s\n" % arg)
 21                self.addstats("<???>", "unknown", 1)
 22
 23    def statdir(self, dir):
 24        self.addstats("<dir>", "dirs", 1)
 25        try:
 26            names = os.listdir(dir)
 27        except os.error, err:
 28            sys.stderr.write("Can't list %s: %s\n" % (dir, err))
 29            self.addstats("<dir>", "unlistable", 1)
 30            return
 31        names.sort()
 32        for name in names:
 33            if name.startswith(".#"):
 34                continue # Skip CVS temp files
 35            if name.endswith("~"):
 36                continue# Skip Emacs backup files
 37            full = os.path.join(dir, name)
 38            if os.path.islink(full):
 39                self.addstats("<lnk>", "links", 1)
 40            elif os.path.isdir(full):
 41                self.statdir(full)
 42            else:
 43                self.statfile(full)
 44
 45    def statfile(self, filename):
 46        head, ext = os.path.splitext(filename)
 47        head, base = os.path.split(filename)
 48        if ext == base:
 49            ext = "" # E.g. .cvsignore is deemed not to have an extension
 50        ext = os.path.normcase(ext)
 51        if not ext:
 52            ext = "<none>"
 53        self.addstats(ext, "files", 1)
 54        try:
 55            f = open(filename, "rb")
 56        except IOError, err:
 57            sys.stderr.write("Can't open %s: %s\n" % (filename, err))
 58            self.addstats(ext, "unopenable", 1)
 59            return
 60        data = f.read()
 61        f.close()
 62        self.addstats(ext, "bytes", len(data))
 63        if '\0' in data:
 64            self.addstats(ext, "binary", 1)
 65            return
 66        if not data:
 67            self.addstats(ext, "empty", 1)
 68        #self.addstats(ext, "chars", len(data))
 69        lines = data.splitlines()
 70        self.addstats(ext, "lines", len(lines))
 71        del lines
 72        words = data.split()
 73        self.addstats(ext, "words", len(words))
 74
 75    def addstats(self, ext, key, n):
 76        d = self.stats.setdefault(ext, {})
 77        d[key] = d.get(key, 0) + n
 78
 79    def report(self):
 80        exts = self.stats.keys()
 81        exts.sort()
 82        # Get the column keys
 83        columns = {}
 84        for ext in exts:
 85            columns.update(self.stats[ext])
 86        cols = columns.keys()
 87        cols.sort()
 88        colwidth = {}
 89        colwidth["ext"] = max([len(ext) for ext in exts])
 90        minwidth = 6
 91        self.stats["TOTAL"] = {}
 92        for col in cols:
 93            total = 0
 94            cw = max(minwidth, len(col))
 95            for ext in exts:
 96                value = self.stats[ext].get(col)
 97                if value is None:
 98                    w = 0
 99                else:
100                    w = len("%d" % value)
101                    total += value
102                cw = max(cw, w)
103            cw = max(cw, len(str(total)))
104            colwidth[col] = cw
105            self.stats["TOTAL"][col] = total
106        exts.append("TOTAL")
107        for ext in exts:
108            self.stats[ext]["ext"] = ext
109        cols.insert(0, "ext")
110        def printheader():
111            for col in cols:
112                print "%*s" % (colwidth[col], col),
113            print
114        printheader()
115        for ext in exts:
116            for col in cols:
117                value = self.stats[ext].get(col, "")
118                print "%*s" % (colwidth[col], value),
119            print
120        printheader() # Another header at the bottom
121
122def main():
123    args = sys.argv[1:]
124    if not args:
125        args = [os.curdir]
126    s = Stats()
127    s.statargs(args)
128    s.report()
129
130if __name__ == "__main__":
131    main()