PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/documentation/doc_helper.py

http://github.com/msanders/autopy
Python | 371 lines | 362 code | 0 blank | 9 comment | 3 complexity | 5fe31177f08c1f376a9f35629f9618f0 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. This is just a simple script I use to help extract the documentation I have
  5. written throughout the program. The documentation is by no means auto-generated
  6. -- I edit it by hand -- but having relevant portions intermixed with the code
  7. helps me keep it up-to-date.
  8. Essentially, this extracts comments I have interspersed throughout the source
  9. code and outputs them in a pseudo-XML format I deem as an "MMM" file, which I
  10. roughly define as follows:
  11. File syntax:
  12. <mmm type="(module|class|index)">
  13. <name>module name</name>
  14. <summary>...</summary> (optional)
  15. <description>...</description> (optional)
  16. <section name="Functions"> (optional)
  17. <function>
  18. <syntax>...</syntax>
  19. <retval>...</retval> (optional; otherwise assumed None)
  20. <arguments>...</arguments>
  21. <description>...</description>
  22. <exceptions> (optional; otherwise assumed none)
  23. <exception name="ExceptionName">...</exception>
  24. </exceptions>
  25. </function>
  26. </functions>
  27. <section name="Constants"> (optional)
  28. <constant name="...">
  29. <value>...</value> (optional)
  30. <description>...</description> (optional)
  31. </constant>
  32. </section>
  33. </mmm>
  34. Attribute tags (enclosed in brackets):
  35. - keyword (e.g. integer, double)
  36. - const (e.g. True, False)
  37. - mono (anything you want to be monospaced)
  38. - note (formatted block note)
  39. - bcode (block code segment)
  40. MMM files can also contain HTML tags (as that is what they are intended to
  41. eventually be converted into), and they will be ignored.
  42. """
  43. import glob
  44. import os
  45. import re
  46. import textwrap
  47. from XMLGen import XMLGen
  48. from lxml import etree
  49. from sys import argv
  50. def itercat(*iterators):
  51. """Concatentate several iterators into one."""
  52. for i in iterators:
  53. for x in i:
  54. yield x
  55. def replace_range(str, start, end, repl):
  56. """Replaces range of characters in str with repl."""
  57. return str[:start] + repl + str[end:]
  58. def brace_expand(str):
  59. """Perform brace expansion, a lá bash."""
  60. match = re.search('{(.+?)(,.*?)?}', str)
  61. if match:
  62. strings = brace_expand(replace_range(str,
  63. match.start(),
  64. match.end(),
  65. match.group(1)))
  66. if match.group(2):
  67. strings.extend(brace_expand(replace_range(str,
  68. match.start(),
  69. match.end(),
  70. match.group(2)[1:])))
  71. return strings
  72. else: # No braces were in the string.
  73. return [str]
  74. def braceglob(pathname):
  75. """Returns iterator of brace expansion and globbing on files."""
  76. return itercat(*(glob.iglob(path) for path in brace_expand(pathname)))
  77. def get_comments(pathnames):
  78. """
  79. Extracts comments from given paths.
  80. Returns dictionary with pathnames as keys and a list of comments as values.
  81. """
  82. # Note that is NOT perfect: it matches certain edge cases such as
  83. # printf("hello /* word */ "), but we're just ignoring that, since it
  84. # doesn't happen in this project and it is not really worth implementing a
  85. # parser for this.
  86. get_comments.regex = re.compile(r'/\*\s*(.+?)\s*\*/', re.DOTALL)
  87. comments = {}
  88. for path in pathnames:
  89. f = open(path, 'r')
  90. comments[path] = (str for str in get_comments.regex.findall(f.read()))
  91. f.close()
  92. return comments
  93. def escape_for_xml(str):
  94. """Returns string with illegal XML entities escaped."""
  95. def replace_all(str, replacements):
  96. for item, repl in replacements.iteritems():
  97. str = str.replace(item, repl)
  98. return str
  99. escape_for_xml.replacements = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
  100. return replace_all(str, escape_for_xml.replacements)
  101. def format_comment(comment):
  102. """
  103. Returns a formatted comment, meaning:
  104. 1.) Extraneous whitespace is ignored.
  105. 2.) Special symbols (e.g. |...|, `...`, {% ... %}) and words (e.g.
  106. True, False) are given descriptive tags. These tags are to be used
  107. in an "MMM" file, a format I have invented solely for the purpose
  108. of this script.
  109. """
  110. def format_block(str):
  111. return textwrap.dedent(str) if '\n' in str else str.strip()
  112. def format_code_repl(match):
  113. return '\n<%s>%s</%s>' % ('bcode',
  114. format_block(match.group(1)),
  115. 'bcode')
  116. format_comment.needles = [(re.compile(r'\|(.+?)\|'), r'<var>\1</var>'),
  117. (re.compile(r'\b(integer|double|float|char|'
  118. r'Boolean|rect)(s?)\b'),
  119. r'<keyword>\1</keyword>\2'),
  120. (re.compile(r'\b(True|False|None)\b'),
  121. r'<const>\1</const>'),
  122. (re.compile('`(.+?)`'), r'<mono>\1</mono>'),
  123. (re.compile(r'\n[\t ]+'), ' '), # Extraneous whitespace
  124. (re.compile(r'^ ', re.M), '\n'), # Intentional linebreaks
  125. ]
  126. format_comment.code_regex = re.compile(r'{%(.*?)%}', re.DOTALL)
  127. formatted = escape_for_xml(comment.strip())
  128. formatted = sub_all_excluding_regex(formatted,
  129. format_comment.needles,
  130. format_comment.code_regex)
  131. formatted = format_comment.code_regex.sub(format_code_repl, formatted)
  132. return formatted
  133. def sub_all_excluding_regex(haystack, needles, exclude_regex):
  134. """
  135. Returns the string obtained by replacing each occurrence of regex in each
  136. (regex, repl) pair of "needles" not matched by "exclude_regex".
  137. Arguments:
  138. "haystack": The string to be searched.
  139. "needles": A list of tuples (regex, repl), where "regex" is a
  140. RegexObject, and "repl" is a string to be passed
  141. to re.sub().
  142. "exclude_regex": A RegexObject wherein nothing is to be replaced.
  143. This could surely be more efficient, but it's fast enough for what
  144. I'm doing.
  145. """
  146. excluded_ranges = []
  147. def rebuild_excluded_ranged(str):
  148. global excluded_ranges
  149. excluded_ranges = [m.span() for m in exclude_regex.finditer(str)]
  150. def in_exclude_range(x):
  151. """Returns True if x is within the range matched by exclude_regex."""
  152. for min, max in excluded_ranges:
  153. if x <= max:
  154. return x >= min
  155. return False
  156. new_haystack = haystack
  157. rebuild_excluded_ranged(new_haystack)
  158. for regex, repl in needles:
  159. start = 0
  160. while True:
  161. # Note that regex.finditer() doesn't work here because we are
  162. # modifying the haystack as we go along.
  163. match = regex.search(new_haystack, start)
  164. if match is None:
  165. break
  166. elif not in_exclude_range(match.start()):
  167. new_str = regex.sub(repl, match.group(0), 1)
  168. new_haystack = replace_range(new_haystack,
  169. match.start(), match.end(),
  170. new_str)
  171. start = match.start() + len(new_str) + 1
  172. # The string has been updated, so we need to rebuild our
  173. # search index.
  174. rebuild_excluded_ranged(new_haystack)
  175. else:
  176. start = match.end()
  177. return new_haystack
  178. def parse_args(comment):
  179. """
  180. Returns dictionary of formatted arguments, where the arguments are in the
  181. format:
  182. "Arguments: |foo| => some type,
  183. |bar| => some other type"
  184. """
  185. parse_args.regex = re.compile(r'\|(.+?)\| =>\s+(.+?)(,$|\Z)', re.S | re.M)
  186. args = {}
  187. for match in parse_args.regex.finditer(comment):
  188. args[match.group(1)] = format_comment(match.group(2))
  189. return args
  190. def parse_exceptions(comment):
  191. """
  192. Returns dictionary of formatted exceptions, where exceptions are in the
  193. format:
  194. "Raises: |Exception| if some reason or another,
  195. |SomeOtherException| if that other reason."
  196. (note that the "if" is required, and the period is optional.)
  197. """
  198. parse_exceptions.regex = re.compile(r'\|(.+?)\|\s+if\s+(.+?)(,$|.?\Z)',
  199. re.S | re.M)
  200. exceptions = {}
  201. for match in parse_exceptions.regex.finditer(comment):
  202. exceptions[match.group(1)] = format_comment(match.group(2))
  203. return exceptions
  204. def remove_extra_whitespace(str):
  205. return re.sub(r'\s{2,}', ' ', str)
  206. def parse_functions(comments):
  207. """Parses all the function comments in a module."""
  208. parse_functions.syntax_regex = \
  209. re.compile(r'Syntax:\s*((\S+)\(.*?\))( => (.*))?', re.DOTALL)
  210. functions = []
  211. parsing_function = False
  212. for comment in comments:
  213. if comment.startswith('Syntax:'): # Must be given first.
  214. parsing_function = True
  215. function = {}
  216. functions.append(function)
  217. match = parse_functions.syntax_regex.match(comment)
  218. if not match or not match.group(1):
  219. raise SyntaxError('Comment "%s" started with "Syntax:" but '\
  220. "didn't follow with function." % comment)
  221. function['name'] = match.group(2)
  222. function['syntax'] = remove_extra_whitespace(match.group(1))
  223. function['return'] = format_comment(match.group(4)
  224. if match.group(4) else "None")
  225. elif parsing_function:
  226. if comment.startswith('Arguments:'):
  227. function['args'] = parse_args(comment)
  228. elif comment.startswith('Description:'):
  229. function['description'] = format_comment(comment[12:])
  230. elif comment.startswith('Raises: '):
  231. function['exceptions'] = parse_exceptions(comment)
  232. parsing_function = False
  233. else:
  234. parsing_function = False
  235. return functions
  236. def parse_header(comments):
  237. """Parse header comments (attributes describing module)."""
  238. attributes = {}
  239. for comment in comments:
  240. if comment.startswith('Summary:'):
  241. attributes['summary'] = format_comment(comment[8:])
  242. elif comment.startswith('Description:'):
  243. attributes['description'] = format_comment(comment[12:])
  244. return attributes
  245. def get_module_attributes(comments):
  246. """
  247. Returns dictionary of module and class attributes from a dictionary of
  248. comments and filenames.
  249. """
  250. modules = {}
  251. for key in comments:
  252. try:
  253. match = re.search('(.*-(.*?)-module|py-(.*?-class))', key)
  254. module_name = match.group(2) if match.group(2) else match.group(3)
  255. except AttributeError:
  256. raise SyntaxError("Invalid module name (this shouldn't happen!): "
  257. '"%s"' % key)
  258. if key.endswith('.c'):
  259. module = modules.setdefault(module_name, {})
  260. module['functions'] = parse_functions(comments[key])
  261. elif key.endswith('.h'):
  262. attributes = parse_header(comments[key])
  263. modules.setdefault(module_name, {}).update(attributes)
  264. return modules
  265. def create_mmm_file(name, module):
  266. """
  267. Returns string describing an "MMM" psuedo-XML file using a module
  268. attributes dictionary.
  269. """
  270. def insert_function_section(xml, function):
  271. xml.push_tag('function', [('name', function['name'])]) # <function>
  272. xml.insert_tag('syntax', text=function['syntax'])
  273. for tagname in ['description']:
  274. if function.has_key(tagname):
  275. xml.insert_tag(tagname, text=function[tagname])
  276. if function.has_key('exceptions'):
  277. xml.push_tag('exceptions') # <exceptions>
  278. exceptions = function['exceptions']
  279. for exception_name in exceptions:
  280. xml.insert_tag('exception', [('name', exception_name)],
  281. text=exceptions[exception_name])
  282. xml.pop_tag() # </exceptions>
  283. xml.pop_tag() # </function>
  284. xml = XMLGen('<?xml version="1.0" encoding="UTF-8"?>')
  285. xml.push_tag('mmm',
  286. [('type', 'class' if 'class' in name else 'module')]) # <mmm>
  287. xml.insert_tag('name', text=name) # <name> ... </name>
  288. for tagname in 'summary', 'description':
  289. if module.has_key(tagname):
  290. xml.insert_tag(tagname, text=module[tagname])
  291. if module.has_key('functions'):
  292. xml.push_tag('section', [('name', 'Functions')]) # <section>
  293. for function in module['functions']:
  294. insert_function_section(xml, function)
  295. xml.pop_tag() # </section>
  296. xml.pop_tag() # </mmm>
  297. return xml.str
  298. def update_mmm_files(dir, modules):
  299. """
  300. Creates MMM files if they do not exist, and returns count of files written.
  301. """
  302. if not os.path.exists(dir):
  303. os.mkdir(dir)
  304. for module in modules:
  305. fpath = os.path.join(dir, module + '.mmm')
  306. f = open(fpath, 'r')
  307. f = open(fpath, 'w')
  308. f.write(create_mmm_file(module, modules[module]))
  309. f.close()
  310. return len(modules)
  311. def main(src_dir, out_dir):
  312. comments = get_comments(braceglob(os.path.join(src_dir,
  313. '{autopy-*-module,py-*-class}*.{c,h}')))
  314. modules = get_module_attributes(comments)
  315. files_written = update_mmm_files(out_dir, modules)
  316. if files_written:
  317. print '%d files written in "%s"' % (files_written, out_dir)
  318. else:
  319. print 'Uh... nothing happened.'
  320. if __name__ == '__main__':
  321. main(argv[1] if len(argv) > 1 else '../src',
  322. argv[2] if len(argv) > 2 else './mmm!')