PageRenderTime 97ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/impl/docgen/docstract.py

http://github.com/mozilla/chromeless
Python | 911 lines | 724 code | 89 blank | 98 comment | 130 complexity | d3b9efbfcbc3985ac0eba664c215ef2e MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
  1. #!/usr/bin/env python
  2. #
  3. # Copyright (c) 2011, Lloyd Hilaiel <lloyd@hilaiel.com>
  4. #
  5. # Permission to use, copy, modify, and/or distribute this software for any
  6. # purpose with or without fee is hereby granted, provided that the above
  7. # copyright notice and this permission notice appear in all copies.
  8. #
  9. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. import re
  17. import os
  18. import types
  19. class DocStract():
  20. class Type():
  21. def __init__(self, val, orig):
  22. val.pop('source_lines')
  23. if (val.has_key('name') and len(val) == 1):
  24. self.value = val['name']
  25. else:
  26. self.value = val
  27. self.orig = orig
  28. def __repr__(self):
  29. return self.orig
  30. def __init__(self):
  31. # the patterns for finding and processing documentation blocks (and the source line
  32. # Note: these two patterns are identical, except the latter captures groups. The
  33. # first is used to split a source file into chunks of text which are either doc blocks
  34. # or source code, the second extracts information from doc blocks.
  35. self.docBlockFindPat = re.compile('(/\*\* .*? \*/ (?:[\s\n]*[^\/\n]*)?)', re.S | re.X)
  36. self.docBlockProcessPat = re.compile('(/\*\*)(.*?)(\*/)( [\s\n]*[^\/\n]*)? ', re.S | re.X)
  37. # after extracting the comment, fix it up (remove *s and leading spaces)
  38. self.blockFilterPat = re.compile('^\s*\* ?', re.M)
  39. # '@' can be escaped with an '@', i.e. @@function when occurs in text blocks
  40. # will not be eaten by the parser. This pattern is used to unescape text
  41. # blocks.
  42. self.unescapeTagPat = re.compile('@@(?=\w+)', re.M)
  43. # the pattern used to split a comment block to create our token stream.
  44. # The token stream consists of:
  45. # * tags: "@tagname"
  46. # * types: "{typename [optional text content]}
  47. # * text: "freeform text that's not either of the above"
  48. #
  49. # This pattern checks for either of the top two, and is applied using
  50. # .split() which handles the third.
  51. self.tokenizePat = re.compile(r'''
  52. (?:
  53. (?<![@\w]) (@\w+) # a tag that's not preceeded by an @ sign (escape)
  54. )
  55. |
  56. (?:
  57. (?<!\\)
  58. ({\w
  59. (?:[^{}]+|
  60. (?:{
  61. (?:[^{}]+|
  62. (?:{[^{}]}) # a regex is the wrong tool for the job, support
  63. # 3 levels of nested curlies.
  64. )*
  65. })
  66. )*
  67. })
  68. )
  69. ''', re.M | re.X);
  70. self.markerPat = re.compile(r'''^ (?<! [@\w] ) ( @ \w+ ) $''', re.M | re.X)
  71. # block types. Each document block is of one of these types.
  72. self.blockTypes = {
  73. '@constructor': ConstructorBlockHandler("@constructor"),
  74. '@function': FunctionBlockHandler("@function"),
  75. '@module': ModuleBlockHandler("@module"),
  76. '@property': PropertyBlockHandler("@property"),
  77. '@class': ClassBlockHandler("@class"),
  78. '@endclass': EndClassBlockHandler("@endclass"),
  79. '@typedef': TypedefBlockHandler("@typedef"),
  80. '@endtypedef': EndTypedefBlockHandler("@endtypedef"),
  81. '@event': EventBlockHandler('@event')
  82. }
  83. # tag aliases, direct equivalences. Note, RHS is normal form.
  84. self.aliases = {
  85. '@func': '@function',
  86. '@params': '@param',
  87. '@parameter': '@param',
  88. '@parameters': '@param',
  89. '@argument': '@param',
  90. '@arg': '@param',
  91. '@prop': '@property',
  92. '@returns': '@return',
  93. '@description': '@desc',
  94. '@seealso': '@see',
  95. '@see_also': '@see',
  96. '@beginclass': '@class',
  97. '@begin_class': '@class',
  98. '@end_class': '@endclass',
  99. '@throw': '@throws',
  100. '@exception': '@throws'
  101. }
  102. # lookup table of tag handlers, lil' object that can parse and inject
  103. # for different tags.
  104. self.tags = {
  105. '@param': ParamTagHandler('@param'),
  106. '@desc': DescTagHandler('@desc'),
  107. '@return': ReturnTagHandler('@return'),
  108. '@see': SeeTagHandler('@see'),
  109. '@throws': ThrowsTagHandler('@throws'),
  110. '@type': TypeTagHandler('@type'),
  111. '@payload': PayloadTagHandler('@type')
  112. }
  113. # these are a list of functions that examine extraction state and try to guess
  114. # what type of construct a documentation block documents
  115. self.typeGuessers = [
  116. hasGetSetterIsPropertyTypeGuesser,
  117. isFunctionIfKeywordInCodeTypeGuesser,
  118. firstBlockIsModuleTypeGuesser,
  119. assignmentIsProbablyPropertyTypeGuesser,
  120. typeWithoutReturnsIsProbablyPropertyTypeGuesser
  121. ]
  122. # these are a list of functions that, given a block type and subsequent chunk of code,
  123. # try to guess the name of the construct being documented
  124. self.nameGuessers = [
  125. standardFunctionNameGuesser,
  126. getSetterNameGuesser,
  127. objectPropertyNameGuesser,
  128. commonJSNameGuesser,
  129. assignToPropertyNameGuesser
  130. ]
  131. def _isMarker(self, tok):
  132. return type(tok) == types.StringType and self.markerPat.match(tok)
  133. def _popNonMarker(self, toks):
  134. nxt = None
  135. if (len(toks) == 0):
  136. return None
  137. if not self._isMarker(self._peekTok(toks)):
  138. nxt = toks.pop(0)
  139. return nxt
  140. def _peekTok(self, toks):
  141. if (len(toks)):
  142. return toks[0]
  143. return None
  144. def _consumeToks(self, tokens, currentObj):
  145. cur = tokens.pop(0)
  146. handler = None
  147. # is this a blocktype declaration?
  148. if cur in self.blockTypes:
  149. handler = self.blockTypes[cur]
  150. if currentObj['blockHandler']:
  151. raise RuntimeError("%s and %s may " %
  152. (currentObj['blockHandler'].tagName,
  153. handler.tagName) +
  154. "not occur in same documentation block")
  155. currentObj['blockHandler'] = handler
  156. elif cur in self.tags:
  157. handler = self.tags[cur]
  158. # now let's gather together all the arguments (non-tags)
  159. args = [ ]
  160. while len(tokens) > 0 and not self._isMarker(self._peekTok(tokens)):
  161. t = tokens.pop(0)
  162. args.append(t)
  163. # do we have a handler for this tag?
  164. if not handler == None:
  165. arg = None
  166. # get argument if required
  167. if handler.takesArg:
  168. if len(args) == 0 and not handler.argOptional:
  169. raise RuntimeError("%s tag requires an argument" % cur)
  170. elif not len(args) == 0:
  171. raise RuntimeError("no arguments allowed to %s tag" % cur)
  172. ctx = handler.parse(args)
  173. if handler.mayRecur:
  174. if cur not in currentObj["tagData"]:
  175. currentObj["tagData"][cur] = []
  176. currentObj["tagData"][cur].append(ctx)
  177. else:
  178. if cur in currentObj["tagData"]:
  179. raise RuntimeError("%s tag may not occur multiple times in the same documentation block" % cur)
  180. currentObj["tagData"][cur] = ctx
  181. # ooops. Dunno what that is!
  182. else:
  183. raise RuntimeError("unrecognized tag: %s" % cur)
  184. def _guessBlockName(self, codeChunk, blockType):
  185. # given the first line of source code after the block, and it's type
  186. # we'll invoke our name guessers to try to figure out the name of the
  187. # construct being documented
  188. # now let's invoke our type guessers, in order
  189. for func in self.nameGuessers:
  190. t = func(codeChunk, blockType)
  191. if t != None:
  192. return t
  193. return None
  194. def _guessBlockType(self, firstBlock, codeChunk, context, tags):
  195. # first we'll prune possibilities by figuring out which supported blocktypes
  196. # are valid in the current context, and support all of the required tags
  197. tagSet = set(tags)
  198. possibilities = [ ]
  199. for bt in self.blockTypes:
  200. bt = self.blockTypes[bt]
  201. if context not in bt.allowedContexts:
  202. continue
  203. if not tagSet.issubset(bt.allowedTags):
  204. continue
  205. possibilities.append(bt.tagName)
  206. # if we've reduced to exactly one possibility, then we don't need to guess
  207. if len(possibilities) == 1:
  208. return possibilities[0]
  209. # now let's invoke our type guessers, in order
  210. for func in self.typeGuessers:
  211. t = func(firstBlock, codeChunk, context, tags, possibilities)
  212. if t != None:
  213. return t
  214. raise RuntimeError("Can't determine what this block documents (from %s)" % ", ".join(possibilities))
  215. def _whatContext(self, stack):
  216. return stack[-1][0]
  217. def _analyzeBlock(self, block, codeChunk, firstBlock, stack, lineStart, lineEnd):
  218. # Ye' ol' block analysis process. block at this point contains
  219. # a chunk of text that has already had comment markers stripped out.
  220. # Step 1: split the chunk of text into a token stream, each token
  221. # is either a tag /@\w+/ or a chunk of text (tag argument).
  222. # whitespace on either side of tokens is stripped. Also, unescape
  223. # @@tags.
  224. tokens = self.tokenizePat.split(block)
  225. tokens = [n for n in tokens if not n == None]
  226. tokens = [n.lstrip(" \t").lstrip('\r\n').rstrip() for n in tokens if n.strip()]
  227. tokens = [self.unescapeTagPat.sub("@", t) for t in tokens]
  228. # Step 3: Treat initial text as if it were a description.
  229. if not self._isMarker(tokens[0]):
  230. tokens.insert(0, '@desc')
  231. # Step 4: collapse aliases
  232. tokens = [self.aliases[n] if self.aliases.has_key(n) else n for n in tokens]
  233. # "autosplitting". this feature allows multiple blocks to reside in the
  234. # same documentation block (/** */)
  235. tokenGroups = []
  236. while len(tokens):
  237. i = 1
  238. while i < len(tokens):
  239. if (tokens[i] in self.blockTypes):
  240. break
  241. i += 1
  242. tokenGroups.append(tokens[:i])
  243. tokens = tokens[i:]
  244. for tokens in tokenGroups:
  245. # Step 2: initialize an object which will hold the intermediate
  246. # representation of parsed block data.
  247. parseData = {
  248. 'blockHandler': None,
  249. 'tagData': { }
  250. }
  251. # Step 4.5: depth first recursion for inline type parsing
  252. newtoks = []
  253. for t in tokens:
  254. if len(t) >= 2 and t[0:1] == '{' and t[-1:] == '}':
  255. stack.append( ('embedded', {}) )
  256. tokObj = { }
  257. t2 = "@typedef " + t[1:-1]
  258. self._analyzeBlock(t2, codeChunk, False, stack, lineStart, lineEnd)
  259. self._analyzeBlock("@endtypedef", codeChunk, False, stack, lineStart, lineEnd)
  260. newtoks.append(DocStract.Type(stack[-1][1]['val'], t))
  261. stack.pop()
  262. else:
  263. newtoks.append(t)
  264. tokens = newtoks
  265. # Step 5: parse all tokens from the token stream, populating the
  266. # output representation as we go.
  267. while len(tokens):
  268. self._consumeToks(tokens, parseData)
  269. thisContext = self._whatContext(stack)
  270. # Step 6: Heuristics! Apply a set of functions which use the current state of
  271. # documentation extractor and some source code to figure out what
  272. # type of construct (@function, @property, etc) this documentation
  273. # block is documenting, and what its name is.
  274. # only invoke guessing logic if type wasn't explicitly declared
  275. if parseData['blockHandler'] == None:
  276. guessedType = self._guessBlockType(firstBlock, codeChunk, thisContext, parseData['tagData'].keys())
  277. if guessedType not in self.blockTypes:
  278. raise RuntimeError("Don't know how to handle a '%s' documentation block" % guessedType)
  279. parseData['blockHandler'] = self.blockTypes[guessedType]
  280. # always try to guess the name, a name guesser has the first interesting line of code
  281. # after the documentation block and the type of block (it's string name) to work with
  282. guessedName = self._guessBlockName(codeChunk, parseData['blockHandler'].tagName)
  283. # Step 7: Validation phase! Not all tags are allowed in all types of
  284. # documentation blocks. like '@returns' inside a '@classend' block
  285. # would just be nutty. let's scrutinize this block to make sure it's
  286. # sane.
  287. # first check that this doc block type is valid in present context
  288. if thisContext not in parseData['blockHandler'].allowedContexts:
  289. raise RuntimeError("%s not allowed in %s context" %
  290. (parseData['blockHandler'].tagName,
  291. thisContext))
  292. # now check that all present tags are allowed in this block
  293. for tag in parseData['tagData']:
  294. if not tag == parseData['blockHandler'].tagName and tag not in parseData['blockHandler'].allowedTags:
  295. raise RuntimeError("%s not allowed in %s block" %
  296. (tag, parseData['blockHandler'].tagName))
  297. # Step 8: Generation of output document
  298. doc = { }
  299. for tag in parseData['tagData']:
  300. val = parseData['tagData'][tag]
  301. if not type(val) == types.ListType:
  302. val = [ val ]
  303. for v in val:
  304. handler = self.tags[tag] if tag in self.tags else self.blockTypes[tag]
  305. handler.attach(v, doc, parseData['blockHandler'].tagName)
  306. parseData['blockHandler'].setLineNumber(lineStart, lineEnd, doc)
  307. # special case for classes and typedefs
  308. if parseData['blockHandler'].tagName in ('@endclass', '@endtypedef'):
  309. doc = stack.pop()[1]
  310. parseData['blockHandler'].merge(doc, stack[-1][1], guessedName, self._whatContext(stack))
  311. if parseData['blockHandler'].tagName == '@class':
  312. stack.append( ('class', doc) )
  313. elif parseData['blockHandler'].tagName == '@typedef':
  314. stack.append( ('type', doc) )
  315. def extractFromFile(self, filename):
  316. # next read the whole file into memory
  317. contents = ""
  318. with open(filename, "r") as f:
  319. contents = f.read()
  320. data = self.extract(contents)
  321. # first determine the module name, it's always the same as the file name
  322. mod = os.path.basename(filename)
  323. dotLoc = mod.rfind(".")
  324. if (dotLoc > 0):
  325. mod = mod[:dotLoc]
  326. if not "module" in data:
  327. data["module"] = mod
  328. if not "filename" in data:
  329. data["filename"] = filename
  330. return data
  331. def extract(self, contents):
  332. # clear the lil' context flag that lets us know when we're parsing
  333. # classes (class definitions cannot span files)
  334. stack = [ ( 'global', {} ) ]
  335. # now parse out and combine comment blocks
  336. firstBlock = True
  337. line = 0
  338. for text in self.docBlockFindPat.split(contents):
  339. lineStart = line + 1
  340. line += text.count('\n')
  341. # if this isn't a documentation block, carry on
  342. m = self.docBlockProcessPat.match(text)
  343. if m:
  344. block = self.blockFilterPat.sub("", m.group(2)).strip()
  345. context = m.group(4).strip()
  346. # data will be mutated!
  347. try:
  348. self._analyzeBlock(block, context, firstBlock, stack, lineStart, line)
  349. except RuntimeError, exc:
  350. args = exc.args
  351. if not args:
  352. arg0 = ''
  353. else:
  354. arg0 = args[0]
  355. arg0 += ' at line %s' % lineStart
  356. exc.args = (arg0,) + args[1:]
  357. raise
  358. firstBlock = False
  359. return stack[0][1]
  360. # begin definition of Tag Handler classes.
  361. # TagHandler is the base class for a handler of tags. This is an
  362. # object that is capable of parsing tags and merging them into
  363. # the output JSON document.
  364. class TagHandler(object):
  365. # if takesArg is true, then text may occur after the tag
  366. # (it "accepts" a single text blob as an argument)
  367. takesArg = False
  368. # if takesArg is True, argOptional specifies whether the
  369. # argument is required
  370. argOptional = False
  371. # if mayRecur is True the tag may be specified multiple times
  372. # in a single document text blob.
  373. mayRecur = False
  374. def __init__(self, tagname):
  375. self.tagName = tagname
  376. # the parse method attempts to parse the text blob and returns
  377. # any representation of it that it likes. This method should throw
  378. # if there's a syntactic error in the text argument. text may be
  379. # 'None' if the tag accepts no argument.
  380. def parse(self, args):
  381. return " ".join([str(a) for a in args]) if len(args) > 0 else None
  382. # attach merges the results of parsing a tag into the output
  383. # JSON document for a documentation block. `obj` is the value
  384. # returned by parse(), and parent is the json document that
  385. # the function should mutate
  386. def attach(self, obj, parent, blockType):
  387. parent[self.tagName[1:]] = obj
  388. # utility function for determining if an argument is a type
  389. def _isType(self, arg):
  390. return isinstance(arg, DocStract.Type)
  391. # utility function for rendering arguments
  392. def _argPrint(self, args):
  393. return ("(" + str(len(args)) + "): ") + " | ".join([str(x)[:10] for x in args])[:40] + "..."
  394. class ParamTagHandler(TagHandler):
  395. mayRecur = True
  396. takesArg = True
  397. _nameAndDescPat = re.compile('^([\w.\[\]]+)?\s*(.*)$', re.S);
  398. # a pattern used to detect & strip optional brackets
  399. _optionalPat = re.compile('^\[(.*)\]$')
  400. def parse(self, args):
  401. p = { }
  402. # collapse two arg case into one arg
  403. if (len(args) == 2 and self._isType(args[0])):
  404. p['type'] = args[0].value
  405. args = args[1:]
  406. if len(args) == 1:
  407. m = self._nameAndDescPat.match(args[0])
  408. if not m or self._isType(args[0]):
  409. raise RuntimeError("Malformed args to %s: %s" %
  410. (self.tagName, self._argPrint(args)))
  411. if m.group(1):
  412. p['name'] = m.group(1)
  413. if m.group(2):
  414. p['desc'] = m.group(2)
  415. elif len(args) == 2:
  416. # @param name {type}
  417. if self._isType(args[0]) or not self._isType(args[1]):
  418. raise RuntimeError("Malformed args to %s: %s" %
  419. (self.tagName, self._argPrint(args)))
  420. p['name'] = args[0]
  421. p['type'] = args[1].value
  422. elif len(args) == 3:
  423. # this is
  424. # @param name {type} desc
  425. if self._isType(args[0]) or not self._isType(args[1]) or self._isType(args[2]):
  426. raise RuntimeError("Malformed args to %s: %s" %
  427. (self.tagName, self._argPrint(args)))
  428. p['name'] = args[0]
  429. p['type'] = args[1].value
  430. p['desc'] = args[2]
  431. else:
  432. raise RuntimeError("Malformed args to %s: %s" %
  433. (self.tagName, self._argPrint(args)))
  434. return p
  435. def _handleOptionalSyntax(self, obj):
  436. # handle optional syntax: [name]
  437. if ('name' in obj):
  438. m = self._optionalPat.match(obj['name'])
  439. if m:
  440. obj['name'] = m.group(1)
  441. obj['optional'] = True
  442. def attach(self, obj, current, blockType):
  443. self._handleOptionalSyntax(obj)
  444. if not 'params' in current:
  445. current['params'] = [ ]
  446. current['params'].append(obj)
  447. class SeeTagHandler(TagHandler):
  448. takesArg = True
  449. mayRecur = True
  450. def attach(self, obj, current, blockType):
  451. if not 'see' in current:
  452. current['see'] = [ ]
  453. current['see'].append(obj)
  454. class DescTagHandler(TagHandler):
  455. takesArg = True
  456. mayRecur = True
  457. def attach(self, obj, current, blockType):
  458. if 'desc' in current:
  459. current['desc'] = current['desc'] + "\n\n" + obj
  460. else:
  461. current['desc'] = obj
  462. class ReturnTagHandler(TagHandler):
  463. takesArg = True
  464. _pat = re.compile('^\s*(?:{(\w+)})?\s*(.*)$', re.S);
  465. def parse(self, args):
  466. rv = { }
  467. for a in args:
  468. if self._isType(a):
  469. if 'type' in rv:
  470. raise RuntimeError("Return type multiply decalared")
  471. rv['type'] = a.value
  472. else:
  473. if 'desc' in rv:
  474. raise RuntimeError("Bogus arguments to %s: %s" %
  475. (self.tagName, self._argPrint(args)))
  476. rv['desc'] = a
  477. return rv
  478. def attach(self, obj, current, blockType):
  479. # The only way this can occur (returns already defined) is if
  480. # someone added an extension that behaves badly, or if @type and
  481. # @returns occur in the same block.
  482. if 'returns' in current:
  483. for k in current['returns']:
  484. if k in obj:
  485. raise RuntimeError("Return %s redefined (@type and @returns in " % k +
  486. "same function block?)")
  487. else:
  488. current['returns'] = {}
  489. for k in obj:
  490. current['returns'][k] = obj[k]
  491. class TypeTagHandler(TagHandler):
  492. takesArg = True
  493. _isWordPat = re.compile('^\w+$', re.S);
  494. def parse(self, args):
  495. if len(args) > 1:
  496. raise RuntimeError("Bogus arguments to %s: %s" %
  497. (self.tagName, self._argPrint(args)))
  498. if self._isType(args[0]):
  499. args[0] = args[0].value.strip()
  500. else:
  501. m = self._isWordPat.match(args[0])
  502. if not m:
  503. raise RuntimeError("Bogus argument to %s: %s" % (self.tagName, args[0]))
  504. return args[0]
  505. # type is special. it means different things
  506. # when it occurs in a '@property' vs. a '@function'
  507. # context. in the former it's the property type, in
  508. # the later, it's an alias for '@return'
  509. def attach(self, obj, current, blockType):
  510. if (blockType == '@property'):
  511. current['type'] = obj
  512. else:
  513. if 'returns' not in current:
  514. current['returns'] = { }
  515. if 'type' in current['returns']:
  516. raise RuntimeError("Return type redefined (@type and @returns in " +
  517. "same function block?)")
  518. current['returns']['type'] = obj
  519. class ThrowsTagHandler(ReturnTagHandler):
  520. mayRecur = True
  521. def attach(self, obj, current, blockType):
  522. if 'throws' not in current:
  523. current['throws'] = [ ]
  524. current['throws'].append(obj)
  525. class PayloadTagHandler(ReturnTagHandler):
  526. mayRecur = False
  527. def attach(self, obj, current, blockType):
  528. if 'payload' in current:
  529. raise RuntimeError("an event can't have multiple payloads");
  530. current['payload'] = obj
  531. # a block handler is slightly different than a tag
  532. # handler. Each document block is of a certain type,
  533. # it describes *something*. Block handlers do
  534. # everything that TagHandlers do, but also:
  535. # * one block handler per code block, they're mutually
  536. # exclusive (a docblock can't describe a *function*
  537. # AND a *property*)
  538. # * express what tags may occur inside of them
  539. # * express what contexts they may occur in ('global'
  540. # and 'class' are the only two meaninful contexts at
  541. # present).
  542. class BlockHandler(TagHandler):
  543. allowedTags = [ ]
  544. allowedContexts = [ 'global', 'class' ]
  545. def merge(self, doc, parent, guessedName, context):
  546. for k in doc:
  547. parent[k] = doc[k]
  548. def setLineNumber(self, lineStart, lineEnd, doc):
  549. doc['source_lines'] = [ lineStart, lineEnd ]
  550. class ModuleBlockHandler(BlockHandler):
  551. allowedTags = [ '@desc', '@see' ]
  552. allowedContexts = [ 'global' ]
  553. takesArg = True
  554. _pat = re.compile('^([\-\w]+)$|^(?:([\-\w.\[\]]+)\s*\n)?\s*(.*)$', re.S);
  555. def parse(self, args):
  556. if len(args) != 1:
  557. raise RuntimeError("You may not pass args (like, {string}) to %s" %
  558. self.tagName)
  559. text = args[0]
  560. m = self._pat.match(text)
  561. if not m:
  562. raise RuntimeError("Malformed args to %s: %s" %
  563. (self.tagName, (text[:20] + "...")))
  564. a = { }
  565. if m.group(1):
  566. a["name"] = m.group(1)
  567. else:
  568. if m.group(2):
  569. a["name"] = m.group(2)
  570. if m.group(3):
  571. a["desc"] = m.group(3)
  572. return a
  573. def attach(self, obj, current, blockType):
  574. if "name" in obj:
  575. current['module'] = obj["name"]
  576. if "desc" in obj:
  577. if "desc" in current:
  578. obj['desc'] = current['desc'] + "\n\n" + obj['desc']
  579. current['desc'] = obj['desc']
  580. def merge(self, doc, parent, guessedName, context):
  581. # first fields that we wish to not overwrite
  582. for f in doc:
  583. if f == 'desc':
  584. parent['desc'] = parent['desc'] + "\n\n" + doc['desc'] if 'desc' in parent else doc['desc']
  585. elif f == "module":
  586. parent['module'] = doc['module']
  587. elif (f in doc and f not in parent):
  588. parent[f] = doc[f]
  589. class FunctionBlockHandler(ModuleBlockHandler):
  590. allowedTags = [ '@see', '@param', '@return', '@throws', '@desc', '@type' ]
  591. allowedContexts = [ 'global', 'class' ]
  592. def attach(self, obj, current, blockType):
  593. if "name" in obj:
  594. current['name'] = obj["name"]
  595. if "desc" in obj:
  596. if "desc" in current:
  597. obj['desc'] = current['desc'] + "\n\n" + obj['desc']
  598. current['desc'] = obj['desc']
  599. def merge(self, doc, parent, guessedName, context):
  600. if "name" not in doc:
  601. doc['name'] = guessedName
  602. if doc['name'] == None:
  603. raise RuntimeError("can't determine function name")
  604. if not "functions" in parent:
  605. parent["functions"] = []
  606. for f in parent["functions"]:
  607. if doc["name"] == f['name']:
  608. raise RuntimeError("function '%s' redefined" % doc["name"])
  609. parent["functions"].append(doc)
  610. class EventBlockHandler(ModuleBlockHandler):
  611. allowedTags = [ '@see', '@desc', '@payload' ]
  612. allowedContexts = [ 'global', 'class' ]
  613. def attach(self, obj, current, blockType):
  614. if "name" in obj:
  615. current['name'] = obj["name"]
  616. if "desc" in obj:
  617. if "desc" in current:
  618. obj['desc'] = current['desc'] + "\n\n" + obj['desc']
  619. current['desc'] = obj['desc']
  620. def merge(self, doc, parent, guessedName, context):
  621. if "name" not in doc:
  622. doc['name'] = guessedName
  623. if doc['name'] == None:
  624. raise RuntimeError("can't determine event name")
  625. if not "events" in parent:
  626. parent["events"] = []
  627. for e in parent["events"]:
  628. if doc["name"] == e['name']:
  629. raise RuntimeError("'%s' event redefined" % doc["name"])
  630. parent["events"].append(doc)
  631. class ConstructorBlockHandler(BlockHandler):
  632. allowedTags = [ '@see', '@param', '@throws', '@desc', '@return', '@type' ]
  633. takesArg = True
  634. argOptional = True
  635. allowedContexts = [ 'class' ]
  636. def attach(self, obj, current, blockType):
  637. if obj:
  638. if "desc" in current:
  639. obj = current['desc'] + "\n\n" + obj
  640. current['desc'] = obj
  641. def merge(self, doc, parent, guessedName, context):
  642. if not "constructors" in parent:
  643. parent["constructors"] = []
  644. parent["constructors"].append(doc)
  645. class ClassBlockHandler(FunctionBlockHandler):
  646. allowedTags = [ '@see', '@desc' ]
  647. def merge(self, doc, parent, guessedName, context):
  648. if "name" not in doc:
  649. doc['name'] = guessedName
  650. return doc
  651. class EndClassBlockHandler(BlockHandler):
  652. allowedContexts = [ 'class' ]
  653. def attach(self, obj, current, blockType):
  654. pass
  655. def merge(self, doc, parent, guessedName, context):
  656. if not "classes" in parent:
  657. parent["classes"] = []
  658. for c in parent["classes"]:
  659. if doc["name"] == c['name']:
  660. raise RuntimeError("class '%s' redefined" % doc["name"])
  661. parent["classes"].append(doc)
  662. class TypedefBlockHandler(FunctionBlockHandler):
  663. allowedTags = [ ]
  664. allowedContexts = [ 'embedded' ]
  665. takesArg = True
  666. _pat = re.compile('^(\w+)$|^(?:([\w.\[\]]+)\s*\n)?\s*(.*)$', re.S);
  667. def parse(self, args):
  668. if len(args) != 1 or self._isType(args[0]):
  669. raise RuntimeError("%s accepts a string argument" % self.tagName)
  670. return args[0]
  671. def attach(self, obj, current, blockType):
  672. current['name'] = obj
  673. def merge(self, doc, parent, guessedName, context):
  674. parent['val'] = doc
  675. class EndTypedefBlockHandler(BlockHandler):
  676. allowedContexts = [ 'type' ]
  677. def attach(self, obj, current, blockType):
  678. pass
  679. def merge(self, doc, parent, guessedName, context):
  680. pass
  681. class PropertyBlockHandler(ParamTagHandler, BlockHandler):
  682. allowedTags = [ '@see', '@throws', '@desc', '@type' ]
  683. allowedContexts = [ 'type', 'class', 'global' ]
  684. def attach(self, obj, current, blockType):
  685. for x in obj:
  686. current[x] = obj[x]
  687. def merge(self, doc, parent, guessedName, context):
  688. if "name" not in doc:
  689. doc['name'] = guessedName
  690. if doc["name"] == None:
  691. raise RuntimeError("can't determine property name")
  692. if not "properties" in parent:
  693. parent["properties"] = []
  694. for p in parent["properties"]:
  695. if doc["name"] == p['name']:
  696. raise RuntimeError("property '%s' redefined" % doc["name"])
  697. if context == "type":
  698. self._handleOptionalSyntax(doc)
  699. parent["properties"].append(doc)
  700. # A type guesser that assumes the first documentation block of a source file is
  701. # probably a '@module' documentation block
  702. def firstBlockIsModuleTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  703. if '@module' in possibilities and firstBlock:
  704. return '@module'
  705. return None
  706. # A type guesser that checks the codeChunk for appearance of the keyword 'function'
  707. _functionKeywordPat = re.compile('(?<!\w)function(?!\w)');
  708. def isFunctionIfKeywordInCodeTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  709. if '@function' in possibilities and _functionKeywordPat.search(codeChunk):
  710. return '@function'
  711. return None
  712. # A type guesser that assumes '@property' based on the presence of @type and the absence of @return.
  713. def typeWithoutReturnsIsProbablyPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  714. if '@type' in tags and '@return' not in tags and '@property' in possibilities:
  715. return '@property'
  716. return None
  717. # a guesser which assumes if a documentation block occurs before an assignment, its probably a
  718. # property (this is a bit questionable, folks)
  719. _assignmentPat = re.compile('^.*=.*;\s*$', re.M);
  720. def assignmentIsProbablyPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  721. if '@property' in possibilities and _assignmentPat.match(codeChunk):
  722. return '@property'
  723. return None
  724. _hasGetSetterPat = re.compile('__define[GS]etter__');
  725. def hasGetSetterIsPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  726. if '@property' in possibilities and _hasGetSetterPat.search(codeChunk):
  727. return '@property'
  728. return None
  729. # A name guesser that looks for exports.XXX and assumes XXX is the name we want
  730. # define the pattern globally in this module so we don't recompile it all the time
  731. _findExportsPat = re.compile('(?:^|\s)exports\.(\w+)\s', re.M);
  732. def commonJSNameGuesser(codeChunk, blockType):
  733. m = _findExportsPat.search(codeChunk)
  734. if m:
  735. return m.group(1)
  736. return None
  737. # A name guesser that catches assignment to properties and guesses the name based
  738. # on that. like `this.foo` or `stream.bar`. Very general, but requires rooting
  739. # at the beginning of line, whereas exports guesser does not
  740. _findPropPat = re.compile('^\s*\w+\.(\w+)\s*=', re.M);
  741. def assignToPropertyNameGuesser(codeChunk, blockType):
  742. m = _findPropPat.search(codeChunk)
  743. if m:
  744. return m.group(1)
  745. return None
  746. _standardFunctionPat = re.compile('^\s*function\s*(\w+)\(.*$');
  747. def standardFunctionNameGuesser(codeChunk, blockType):
  748. m = _standardFunctionPat.match(codeChunk)
  749. if m:
  750. return m.group(1)
  751. return None
  752. _objectPropertyPat = re.compile('^\s*(\w+)\s*:.*$');
  753. def objectPropertyNameGuesser(codeChunk, blockType):
  754. m = _objectPropertyPat.match(codeChunk)
  755. if m:
  756. return m.group(1)
  757. return None
  758. _getSetterNameGuesserPat = re.compile(r'''__define[GS]etter__\s* \( \s* (?:"(\w+)" | '(\w+)') ''', re.X);
  759. def getSetterNameGuesser(codeChunk, blockType):
  760. m = _getSetterNameGuesserPat.search(codeChunk)
  761. if m:
  762. return m.group(1) if m.group(1) else m.group(2)
  763. return None
  764. if __name__ == '__main__':
  765. import sys
  766. import json
  767. ds = DocStract()
  768. docs = None
  769. if len (sys.argv) == 2:
  770. docs = ds.extractFromFile(sys.argv[1])
  771. elif len (sys.argv) == 1:
  772. docs = ds.extract(sys.stdin.read())
  773. else:
  774. print >> sys.stderr, "Usage: docstract [file]"
  775. sys.exit(1)
  776. print json.dumps(docs, indent=2, sort_keys=True) + "\n"