/Tools/scripts/byext.py

http://unladen-swallow.googlecode.com/ · Python · 131 lines · 116 code · 11 blank · 4 comment · 30 complexity · 42dc6f8bc2cac43e59407331e3adbe55 MD5 · raw file

  1. #! /usr/bin/env python
  2. """Show file statistics by extension."""
  3. import os
  4. import sys
  5. class Stats:
  6. def __init__(self):
  7. self.stats = {}
  8. def statargs(self, args):
  9. for arg in args:
  10. if os.path.isdir(arg):
  11. self.statdir(arg)
  12. elif os.path.isfile(arg):
  13. self.statfile(arg)
  14. else:
  15. sys.stderr.write("Can't find %s\n" % arg)
  16. self.addstats("<???>", "unknown", 1)
  17. def statdir(self, dir):
  18. self.addstats("<dir>", "dirs", 1)
  19. try:
  20. names = os.listdir(dir)
  21. except os.error, err:
  22. sys.stderr.write("Can't list %s: %s\n" % (dir, err))
  23. self.addstats("<dir>", "unlistable", 1)
  24. return
  25. names.sort()
  26. for name in names:
  27. if name.startswith(".#"):
  28. continue # Skip CVS temp files
  29. if name.endswith("~"):
  30. continue# Skip Emacs backup files
  31. full = os.path.join(dir, name)
  32. if os.path.islink(full):
  33. self.addstats("<lnk>", "links", 1)
  34. elif os.path.isdir(full):
  35. self.statdir(full)
  36. else:
  37. self.statfile(full)
  38. def statfile(self, filename):
  39. head, ext = os.path.splitext(filename)
  40. head, base = os.path.split(filename)
  41. if ext == base:
  42. ext = "" # E.g. .cvsignore is deemed not to have an extension
  43. ext = os.path.normcase(ext)
  44. if not ext:
  45. ext = "<none>"
  46. self.addstats(ext, "files", 1)
  47. try:
  48. f = open(filename, "rb")
  49. except IOError, err:
  50. sys.stderr.write("Can't open %s: %s\n" % (filename, err))
  51. self.addstats(ext, "unopenable", 1)
  52. return
  53. data = f.read()
  54. f.close()
  55. self.addstats(ext, "bytes", len(data))
  56. if '\0' in data:
  57. self.addstats(ext, "binary", 1)
  58. return
  59. if not data:
  60. self.addstats(ext, "empty", 1)
  61. #self.addstats(ext, "chars", len(data))
  62. lines = data.splitlines()
  63. self.addstats(ext, "lines", len(lines))
  64. del lines
  65. words = data.split()
  66. self.addstats(ext, "words", len(words))
  67. def addstats(self, ext, key, n):
  68. d = self.stats.setdefault(ext, {})
  69. d[key] = d.get(key, 0) + n
  70. def report(self):
  71. exts = self.stats.keys()
  72. exts.sort()
  73. # Get the column keys
  74. columns = {}
  75. for ext in exts:
  76. columns.update(self.stats[ext])
  77. cols = columns.keys()
  78. cols.sort()
  79. colwidth = {}
  80. colwidth["ext"] = max([len(ext) for ext in exts])
  81. minwidth = 6
  82. self.stats["TOTAL"] = {}
  83. for col in cols:
  84. total = 0
  85. cw = max(minwidth, len(col))
  86. for ext in exts:
  87. value = self.stats[ext].get(col)
  88. if value is None:
  89. w = 0
  90. else:
  91. w = len("%d" % value)
  92. total += value
  93. cw = max(cw, w)
  94. cw = max(cw, len(str(total)))
  95. colwidth[col] = cw
  96. self.stats["TOTAL"][col] = total
  97. exts.append("TOTAL")
  98. for ext in exts:
  99. self.stats[ext]["ext"] = ext
  100. cols.insert(0, "ext")
  101. def printheader():
  102. for col in cols:
  103. print "%*s" % (colwidth[col], col),
  104. print
  105. printheader()
  106. for ext in exts:
  107. for col in cols:
  108. value = self.stats[ext].get(col, "")
  109. print "%*s" % (colwidth[col], value),
  110. print
  111. printheader() # Another header at the bottom
  112. def main():
  113. args = sys.argv[1:]
  114. if not args:
  115. args = [os.curdir]
  116. s = Stats()
  117. s.statargs(args)
  118. s.report()
  119. if __name__ == "__main__":
  120. main()