PageRenderTime 88ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/util/find_copyrights.py

https://bitbucket.org/musleh123/gem5_cetus
Python | 273 lines | 253 code | 19 blank | 1 comment | 48 complexity | 002a84b5022f94579098e69b5b62f16b MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. #!/usr/bin/env python
  2. import os
  3. import re
  4. import sys
  5. from file_types import lang_type, find_files
  6. mode_line = re.compile('(-\*- *mode:.* *-\*-)')
  7. shell_comment = re.compile(r'^\s*#')
  8. lisp_comment = re.compile(r';')
  9. cpp_comment = re.compile(r'//')
  10. c_comment_start = re.compile(r'/\*')
  11. c_comment_end = re.compile(r'\*/')
  12. def find_copyright_block(lines, lang_type):
  13. start = None
  14. if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
  15. for i,line in enumerate(lines):
  16. if i == 0 and (line.startswith('#!') or mode_line.search(line)):
  17. continue
  18. if shell_comment.search(line):
  19. if start is None:
  20. start = i
  21. elif start is None:
  22. if line.strip():
  23. return
  24. else:
  25. yield start, i-1
  26. start = None
  27. elif lang_type in ('lisp', ):
  28. for i,line in enumerate(lines):
  29. if i == 0 and mode_line.search(line):
  30. continue
  31. if lisp_comment.search(line):
  32. if start is None:
  33. start = i
  34. elif start is None:
  35. if line.strip():
  36. return
  37. else:
  38. yield start, i-1
  39. start = None
  40. elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
  41. 'lex', 'yacc'):
  42. mode = None
  43. for i,line in enumerate(lines):
  44. if i == 0 and mode_line.search(line):
  45. continue
  46. if mode == 'C':
  47. assert start is not None, 'on line %d' % (i + 1)
  48. match = c_comment_end.search(line)
  49. if match:
  50. yield start, i
  51. mode = None
  52. continue
  53. cpp_match = cpp_comment.search(line)
  54. c_match = c_comment_start.search(line)
  55. if cpp_match:
  56. assert not c_match, 'on line %d' % (i + 1)
  57. if line[:cpp_match.start()].strip():
  58. return
  59. if mode is None:
  60. mode = 'CPP'
  61. start = i
  62. else:
  63. text = line[cpp_match.end():].lstrip()
  64. if text.startswith("Copyright") > 0:
  65. yield start, i-1
  66. start = i
  67. continue
  68. elif mode == 'CPP':
  69. assert start is not None, 'on line %d' % (i + 1)
  70. if not line.strip():
  71. continue
  72. yield start, i-1
  73. mode = None
  74. if not c_match:
  75. return
  76. if c_match:
  77. assert mode is None, 'on line %d' % (i + 1)
  78. mode = 'C'
  79. start = i
  80. if mode is None and line.strip():
  81. return
  82. else:
  83. raise AttributeError, "Could not handle language %s" % lang_type
  84. date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
  85. def process_dates(dates):
  86. dates = [ d.strip() for d in dates.split(',') ]
  87. output = set()
  88. for date in dates:
  89. match = date_range_re.match(date)
  90. if match:
  91. f,l = [ int(d) for d in match.groups() ]
  92. for i in xrange(f, l+1):
  93. output.add(i)
  94. else:
  95. try:
  96. date = int(date)
  97. output.add(date)
  98. except ValueError:
  99. pass
  100. return output
  101. copyright_re = \
  102. re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
  103. re.DOTALL)
  104. authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
  105. more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
  106. all_owners = set()
  107. def get_data(lang_type, lines):
  108. data = []
  109. last = None
  110. for start,end in find_copyright_block(lines, lang_type):
  111. joined = ''.join(lines[start:end+1])
  112. match = copyright_re.search(joined)
  113. if not match:
  114. continue
  115. c,dates,owner = match.groups()
  116. dates = dates.strip()
  117. owner = owner.strip()
  118. all_owners.add(owner)
  119. try:
  120. dates = process_dates(dates)
  121. except Exception:
  122. print dates
  123. print owner
  124. raise
  125. authors = []
  126. for i in xrange(start,end+1):
  127. line = lines[i]
  128. if not authors:
  129. match = authors_re.search(line)
  130. if match:
  131. authors.append(match.group(1).strip())
  132. else:
  133. match = more_authors_re.search(line)
  134. if not match:
  135. for j in xrange(i, end+1):
  136. line = lines[j].strip()
  137. if not line:
  138. end = j
  139. break
  140. if line.startswith('//'):
  141. line = line[2:].lstrip()
  142. if line:
  143. end = j - 1
  144. break
  145. break
  146. authors.append(match.group(1).strip())
  147. info = (owner, dates, authors, start, end)
  148. data.append(info)
  149. return data
  150. def datestr(dates):
  151. dates = list(dates)
  152. dates.sort()
  153. output = []
  154. def add_output(first, second):
  155. if first == second:
  156. output.append('%d' % (first))
  157. else:
  158. output.append('%d-%d' % (first, second))
  159. first = dates.pop(0)
  160. second = first
  161. while dates:
  162. next = dates.pop(0)
  163. if next == second + 1:
  164. second = next
  165. else:
  166. add_output(first, second)
  167. first = next
  168. second = next
  169. add_output(first, second)
  170. return ','.join(output)
  171. usage_str = """usage:
  172. %s [-v] <directory>"""
  173. def usage(exitcode):
  174. print usage_str % sys.argv[0]
  175. if exitcode is not None:
  176. sys.exit(exitcode)
  177. if __name__ == '__main__':
  178. import getopt
  179. show_counts = False
  180. ignore = set()
  181. verbose = False
  182. try:
  183. opts, args = getopt.getopt(sys.argv[1:], "ci:v")
  184. except getopt.GetoptError:
  185. usage(1)
  186. for o,a in opts:
  187. if o == '-c':
  188. show_counts = True
  189. if o == '-i':
  190. ignore.add(a)
  191. if o == '-v':
  192. verbose = True
  193. files = []
  194. for base in args:
  195. if os.path.isfile(base):
  196. files += [ (base, lang_type(base)) ]
  197. elif os.path.isdir(base):
  198. files += find_files(base)
  199. else:
  200. raise AttributeError, "can't access '%s'" % base
  201. copyrights = {}
  202. counts = {}
  203. for filename, lang in files:
  204. f = file(filename, 'r')
  205. lines = f.readlines()
  206. if not lines:
  207. continue
  208. lines = [ line.rstrip('\r\n') for line in lines ]
  209. lt = lang_type(filename, lines[0])
  210. try:
  211. data = get_data(lt, lines)
  212. except Exception, e:
  213. if verbose:
  214. if len(e.args) == 1:
  215. e.args = ('%s (%s))' % (e, filename), )
  216. print "could not parse %s: %s" % (filename, e)
  217. continue
  218. for owner, dates, authors, start, end in data:
  219. if owner not in copyrights:
  220. copyrights[owner] = set()
  221. if owner not in counts:
  222. counts[owner] = 0
  223. copyrights[owner] |= dates
  224. counts[owner] += 1
  225. info = [ (counts[o], d, o) for o,d in copyrights.items() ]
  226. for count,dates,owner in sorted(info, reverse=True):
  227. if show_counts:
  228. owner = '%s (%s files)' % (owner, count)
  229. print 'Copyright (c) %s %s' % (datestr(dates), owner)